diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 0c0781095685db5f1f4236e5a35ab56be99df3ba..1812e6e72b9b4de7934b9af20e43e9e91bcea1e7 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -1773,7 +1773,11 @@ void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const Comp } #if JVET_V0130_INTRA_TMP +#if JVET_W0069_TMP_BOUNDARY +RefTemplateType IntraPrediction::GetRefTemplateType(CodingUnit& cu, CompArea& area) +#else bool IntraPrediction::isRefTemplateAvailable(CodingUnit& cu, CompArea& area) +#endif { const ChannelType chType = toChannelType(area.compID); const CodingStructure& cs = *cu.cs; @@ -1799,7 +1803,11 @@ bool IntraPrediction::isRefTemplateAvailable(CodingUnit& cu, CompArea& area) if( numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0 ) { +#if JVET_W0069_TMP_BOUNDARY + return No_Template; +#else return false; +#endif } // ----- Step 1: analyze neighborhood ----- @@ -1814,7 +1822,19 @@ bool IntraPrediction::isRefTemplateAvailable(CodingUnit& cu, CompArea& area) //bool retVal = 1; +#if JVET_W0069_TMP_BOUNDARY + if (isAboveLeftAvailable(cu, chType, posLT) && isAboveAvailable(cu, chType, posLT, numAboveUnits, unitWidth, (neighborFlags + totalLeftUnits + 1)) && isLeftAvailable(cu, chType, posLT, numLeftUnits, unitHeight, (neighborFlags + totalLeftUnits - 1))) + return L_Shape_Template; + else if (isAboveLeftAvailable(cu, chType, posLT)) + return Left_Template; + else if (isAboveAvailable(cu, chType, posLT, numAboveUnits, unitWidth, (neighborFlags + totalLeftUnits + 1))) + return Up_Template; + else + return No_Template; + CHECK(1, "un defined template type"); +#else return isAboveLeftAvailable(cu, chType, posLT) && isAboveAvailable(cu, chType, posLT, numAboveUnits, unitWidth, (neighborFlags + totalLeftUnits + 1)) && isLeftAvailable(cu, chType, posLT, numLeftUnits, unitHeight, (neighborFlags + totalLeftUnits - 1)); +#endif //return retVal; } diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index 9cf085f4b8859084f5d38f22a552f6e4dca7d033..b132622a284f45e968aa649af8ee7486ec988b9a 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -158,7 +158,7 @@ protected: void xPredIntraBDPCM ( const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng ); Pel xGetPredValDc ( const CPelBuf &pSrc, const Size &dstSize ); -#if JVET_V0130_INTRA_TMP +#if JVET_V0130_INTRA_TMP && !JVET_W0069_TMP_BOUNDARY bool isRefTemplateAvailable(CodingUnit& cu, CompArea& area); #endif @@ -203,6 +203,10 @@ public: IntraPrediction(); virtual ~IntraPrediction(); +#if JVET_W0069_TMP_BOUNDARY + RefTemplateType GetRefTemplateType(CodingUnit& cu, CompArea& area); +#endif + void init (ChromaFormat chromaFormatIDC, const unsigned bitDepthY); #if ENABLE_DIMD static void deriveDimdMode (const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu); diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index 40d9dea86175ead1382cc38b0383f967be0025a9..548f8cc34819fedee563a6957f02a97ac21e7059 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -542,7 +542,11 @@ void TempLibFast::initTemplateDiff(unsigned int uiPatchWidth, unsigned int uiPat } } +#if JVET_W0069_TMP_BOUNDARY +void TrQuant::getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType TempType) +#else void TrQuant::getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight) +#endif { const ComponentID compID = COMPONENT_Y; unsigned int uiPatchWidth = uiBlkWidth + TMP_TEMPLATE_SIZE; @@ -557,6 +561,10 @@ void TrQuant::getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsig //fill template //up-left & up Pel* tarTemp; +#if JVET_W0069_TMP_BOUNDARY + if (TempType == L_Shape_Template) + { +#endif Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride - TMP_TEMPLATE_SIZE; for (uiY = 0; uiY < TMP_TEMPLATE_SIZE; uiY++) { @@ -577,9 +585,42 @@ void TrQuant::getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsig } pCurrTemp += uiPicStride; } +#if JVET_W0069_TMP_BOUNDARY + } + else if (TempType == Up_Template) + { + Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE * uiPicStride; + for (uiY = 0; uiY < TMP_TEMPLATE_SIZE; uiY++) + { + tarTemp = tarPatch[uiY]; + for (uiX = 0; uiX < uiBlkWidth; uiX++) + { + tarTemp[uiX] = pCurrTemp[uiX]; + } + pCurrTemp += uiPicStride; + } + } + else if (TempType == Left_Template) + { + Pel* pCurrTemp = pCurrStart - TMP_TEMPLATE_SIZE; + for (uiY = TMP_TEMPLATE_SIZE; uiY < uiPatchHeight; uiY++) + { + tarTemp = tarPatch[uiY]; + for (uiX = 0; uiX < TMP_TEMPLATE_SIZE; uiX++) + { + tarTemp[uiX] = pCurrTemp[uiX]; + } + pCurrTemp += uiPicStride; + } + } +#endif } +#if JVET_W0069_TMP_BOUNDARY +void TrQuant::candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType TempType) +#else void TrQuant::candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight) +#endif { const ComponentID compID = COMPONENT_Y; const int channelBitDepth = pcCU->cs->sps->getBitDepth(toChannelType(compID)); @@ -590,7 +631,11 @@ void TrQuant::candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, un //Initialize the library for saving the best candidates m_tempLibFast.initTemplateDiff(uiPatchWidth, uiPatchHeight, uiBlkWidth, uiBlkHeight, channelBitDepth); short setId = 0; //record the reference picture. +#if JVET_W0069_TMP_BOUNDARY + searchCandidateFromOnePicIntra(pcCU, tarPatch, uiPatchWidth, uiPatchHeight, setId, TempType); +#else searchCandidateFromOnePicIntra(pcCU, tarPatch, uiPatchWidth, uiPatchHeight, setId); +#endif //count collected candidate number int pDiff = m_tempLibFast.getDiff(); int maxDiff = m_tempLibFast.getDiffMax(); @@ -606,7 +651,11 @@ void TrQuant::candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, un } } +#if JVET_W0069_TMP_BOUNDARY +void TrQuant::searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId, RefTemplateType TempType) +#else void TrQuant::searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId) +#endif { const ComponentID compID = COMPONENT_Y; unsigned int uiBlkWidth = uiPatchWidth - TMP_TEMPLATE_SIZE; @@ -685,7 +734,11 @@ void TrQuant::searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset--) { refCurr = ref + iYOffset * refStride + iXOffset; +#if JVET_W0069_TMP_BOUNDARY + diff = m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff, TempType); +#else diff = m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff); +#endif if (diff < (pDiff)) { insertNode(diff, iXOffset, iYOffset, pDiff, pX, pY, pId, setId); @@ -719,7 +772,11 @@ void TrQuant::searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset--) { refCurr = ref + iYOffset * refStride + iXOffset; +#if JVET_W0069_TMP_BOUNDARY + diff = m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff, TempType); +#else diff = m_calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff); +#endif if (diff < (pDiff)) { @@ -780,12 +837,48 @@ bool TrQuant::generateTMPrediction(Pel* piPred, unsigned int uiStride, unsigned return bSucceedFlag; } +#if JVET_W0069_TMP_BOUNDARY +bool TrQuant::generateTM_DC_Prediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int DC_Val) +{ + bool bSucceedFlag = true; + { + for (unsigned int uiY = 0; uiY < uiBlkHeight; uiY++) + { + for (unsigned int uiX = 0; uiX < uiBlkWidth; uiX++) + { + piPred[uiX] = DC_Val; + } + piPred += uiStride; + } + } + return bSucceedFlag; +} +#endif + +#if JVET_W0069_TMP_BOUNDARY +int TrQuant::calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType) +#else int TrQuant::calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax) +#endif { int iDiffSum = 0; +#if JVET_W0069_TMP_BOUNDARY + Pel* refPatchRow; + if (TempType == L_Shape_Template) + refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; + else if (TempType == Left_Template) + refPatchRow = ref - TMP_TEMPLATE_SIZE; + else if (TempType == Up_Template) + refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride; +#else Pel* refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; +#endif Pel* tarPatchRow; +#if JVET_W0069_TMP_BOUNDARY + if (TempType == L_Shape_Template) + { +#endif // horizontal difference for( int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++ ) { @@ -815,6 +908,43 @@ int TrQuant::calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, u } refPatchRow += uiStride; } +#if JVET_W0069_TMP_BOUNDARY + } + else if (TempType == Up_Template) + { + // top template difference + for (int iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) + { + tarPatchRow = tarPatch[iY]; + for (int iX = 0; iX < uiPatchWidth - TMP_TEMPLATE_SIZE; iX++) + { + iDiffSum += abs(refPatchRow[iX] - tarPatchRow[iX]); + } + if (iDiffSum > iMax) //for speeding up + { + return iDiffSum; + } + refPatchRow += uiStride; + } + } + else if (TempType == Left_Template) + { + // left template difference + for (int iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) + { + tarPatchRow = tarPatch[iY]; + for (int iX = 0; iX < TMP_TEMPLATE_SIZE; iX++) + { + iDiffSum += abs(refPatchRow[iX] - tarPatchRow[iX]); + } + if (iDiffSum > iMax) //for speeding up + { + return iDiffSum; + } + refPatchRow += uiStride; + } + } +#endif return iDiffSum; } diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h index 4145f64047924ac5623726cdac87aa9c71b32a10..ec9b42c35f72db6814722650e3c3749e222ec066 100644 --- a/source/Lib/CommonLib/TrQuant.h +++ b/source/Lib/CommonLib/TrQuant.h @@ -130,18 +130,35 @@ public: void invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ); #endif #if JVET_V0130_INTRA_TMP +#if JVET_W0069_TMP_BOUNDARY + int (*m_calcTemplateDiff)(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType); + static int calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType); +#else int ( *m_calcTemplateDiff )(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax); static int calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax); +#endif Pel** getTargetPatch(unsigned int uiDepth) { return m_pppTarPatch[uiDepth]; } Pel* getRefPicUsed() { return m_refPicUsed; } void setRefPicUsed(Pel* ref) { m_refPicUsed = ref; } unsigned int getStride() { return m_uiPicStride; } void setStride(unsigned int uiPicStride) { m_uiPicStride = uiPicStride; } +#if JVET_W0069_TMP_BOUNDARY + void searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId, RefTemplateType TempType); + void candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType TempType); +#else void searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId); void candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight); +#endif bool generateTMPrediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int& foundCandiNum); +#if JVET_W0069_TMP_BOUNDARY + bool generateTM_DC_Prediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int DC_Val); +#endif +#if JVET_W0069_TMP_BOUNDARY + void getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight, RefTemplateType TempType); +#else void getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight); +#endif #endif uint32_t getLFNSTIntraMode( int wideAngPredMode ); diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 9762eb814ab022abc8a4301133a8cde2e110baa6..8f49a782e0cc37885e23a1267b5f4e7236210fbf 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -111,6 +111,7 @@ #define ENABLE_DIMD 1 // Decoder side intra mode derivation #define JVET_V0087_DIMD_NO_ISP ENABLE_DIMD // disallow combination of DIMD and ISP #define JVET_V0130_INTRA_TMP 1 // JVET-V0130: template matching prediction +#define JVET_W0069_TMP_BOUNDARY 1 #define JVET_W0123_TIMD_FUSION 1 // Template based intra mode derivation and fusion @@ -656,6 +657,15 @@ enum ChannelType CHANNEL_TYPE_CHROMA = 1, MAX_NUM_CHANNEL_TYPE = 2 }; +#if JVET_W0069_TMP_BOUNDARY +enum RefTemplateType +{ + L_Shape_Template = 1, + Left_Template = 2, + Up_Template = 3, + No_Template = 4, +}; +#endif #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS enum TreeType { diff --git a/source/Lib/CommonLib/x86/TrQuantX86.h b/source/Lib/CommonLib/x86/TrQuantX86.h index 7404f8fdc78506b73bbcaca8c743dda9447e090e..c560f9fbb46da8ee56874d6a3705e54df53f7b35 100644 --- a/source/Lib/CommonLib/x86/TrQuantX86.h +++ b/source/Lib/CommonLib/x86/TrQuantX86.h @@ -412,15 +412,33 @@ uint32_t computeSAD_SIMD( const Pel* ref, const Pel* cur, const int size ) #if ENABLE_SIMD_TMP template< X86_VEXT vext > +#if JVET_W0069_TMP_BOUNDARY +int calcTemplateDiffSIMD(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax, RefTemplateType TempType) +#else int calcTemplateDiffSIMD( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, int iMax ) +#endif { int iDiffSum = 0; int iY; +#if JVET_W0069_TMP_BOUNDARY + Pel* refPatchRow; + if (TempType == L_Shape_Template) + refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; + else if (TempType == Left_Template) + refPatchRow = ref - TMP_TEMPLATE_SIZE; + else if (TempType == Up_Template) + refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride; +#else Pel* refPatchRow = ref - TMP_TEMPLATE_SIZE * uiStride - TMP_TEMPLATE_SIZE; +#endif Pel* tarPatchRow; uint32_t uiSum; // horizontal difference +#if JVET_W0069_TMP_BOUNDARY + if (TempType == L_Shape_Template) + { +#endif for( iY = 0; iY < TMP_TEMPLATE_SIZE; iY++ ) { tarPatchRow = tarPatch[iY]; @@ -527,6 +545,124 @@ int calcTemplateDiffSIMD( Pel* ref, unsigned int uiStride, Pel** tarPatch, unsig // update location refPatchRow += uiStride; } +#if JVET_W0069_TMP_BOUNDARY + } + else if (TempType == Up_Template) + { + // horizontal difference + for (iY = 0; iY < TMP_TEMPLATE_SIZE; iY++) + { + tarPatchRow = tarPatch[iY]; + const short* pSrc1 = (const short*)tarPatchRow; + const short* pSrc2 = (const short*)refPatchRow; + + // SIMD difference + //int iRows = uiPatchHeight; + int iCols = uiPatchWidth - TMP_TEMPLATE_SIZE; + if ((iCols & 7) == 0) + { + // Do with step of 8 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for (int iX = 0; iX < iCols; iX += 8) + { + __m128i vsrc1 = _mm_loadu_si128((const __m128i*)(&pSrc1[iX])); + __m128i vsrc2 = _mm_lddqu_si128((const __m128i*)(&pSrc2[iX])); + vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + uiSum = _mm_cvtsi128_si32(vsum32); + } + else + { + // Do with step of 4 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for (int iX = 0; iX < iCols; iX += 4) + { + __m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]); + __m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]); + vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + uiSum = _mm_cvtsi128_si32(vsum32); + } + iDiffSum += uiSum; + + if (iDiffSum > iMax) //for speeding up + { + return iDiffSum; + } + // update location + refPatchRow += uiStride; + } + + + } + else if (TempType == Left_Template) + { + + // vertical difference + int iCols = TMP_TEMPLATE_SIZE; + + for (iY = TMP_TEMPLATE_SIZE; iY < uiPatchHeight; iY++) + { + tarPatchRow = tarPatch[iY]; + const short* pSrc1 = (const short*)tarPatchRow; + const short* pSrc2 = (const short*)refPatchRow; + + // SIMD difference + + // Do with step of 4 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for (int iX = 0; iX < iCols; iX += 4) + { + __m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]); + __m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]); + vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + uiSum = _mm_cvtsi128_si32(vsum32); + + iDiffSum += uiSum; + + if (iDiffSum > iMax) //for speeding up + { + return iDiffSum; + } + // update location + refPatchRow += uiStride; + } + } +#endif return iDiffSum; } diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 59f5ca00eae3380566c8f06d1968edd7244ea708..27aa5ae24fb9cdd8b59ab5c8f01dd590b2266ef3 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -384,9 +384,24 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) if (PU::isTmp(pu, chType)) { int foundCandiNum; +#if JVET_W0069_TMP_BOUNDARY + RefTemplateType TempType = m_pcIntraPred->GetRefTemplateType(*(tu.cu), tu.cu->blocks[COMPONENT_Y]); + if (TempType != No_Template) + { + m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight(), TempType); + m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight(), TempType); + m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); + } + else + { + foundCandiNum = 1; + m_pcTrQuant->generateTM_DC_Prediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), 1 << (tu.cu->cs->sps->getBitDepth(CHANNEL_TYPE_LUMA) - 1)); + } +#else m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight()); m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight()); m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); +#endif assert(foundCandiNum >= 1); } else if (PU::isMIP(pu, chType)) @@ -585,7 +600,7 @@ void DecCu::xIntraRecACTBlk(TransformUnit& tu) PelBuf piPred = cs.getPredBuf(area); m_pcIntraPred->initIntraPatternChType(*tu.cu, area); -#if JVET_V0130_INTRA_TMP +#if JVET_V0130_INTRA_TMP && ! JVET_W0069_TMP_BOUNDARY if (PU::isTmp(pu, chType)) { int foundCandiNum; diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 821eca75970bf218bee449080e33d97af1dbdc84..ac3df4bf2bcbfc44388c7ae22ecab14969c5c18b 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -891,12 +891,30 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c bool bsuccessfull = 0; CodingUnit cu_cpy = cu; +#if JVET_W0069_TMP_BOUNDARY + RefTemplateType TemplateType = GetRefTemplateType(cu_cpy, cu_cpy.blocks[COMPONENT_Y]); + if (TemplateType != No_Template) +#else if( isRefTemplateAvailable( cu_cpy, cu_cpy.blocks[COMPONENT_Y] ) ) +#endif { +#if JVET_W0069_TMP_BOUNDARY + m_pcTrQuant->getTargetTemplate(&cu_cpy, pu.lwidth(), pu.lheight(), TemplateType); + m_pcTrQuant->candidateSearchIntra(&cu_cpy, pu.lwidth(), pu.lheight(), TemplateType); + bsuccessfull = m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); +#else m_pcTrQuant->getTargetTemplate( &cu_cpy, pu.lwidth(), pu.lheight() ); m_pcTrQuant->candidateSearchIntra( &cu_cpy, pu.lwidth(), pu.lheight() ); bsuccessfull = m_pcTrQuant->generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); +#endif } +#if JVET_W0069_TMP_BOUNDARY + else + { + foundCandiNum = 1; + bsuccessfull = m_pcTrQuant->generateTM_DC_Prediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), 1 << (cu_cpy.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA) - 1)); + } +#endif if( bsuccessfull && foundCandiNum >= 1 ) { @@ -3660,9 +3678,24 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp if( PU::isTmp( pu, chType ) ) { int foundCandiNum; +#if JVET_W0069_TMP_BOUNDARY + RefTemplateType TempType = GetRefTemplateType(*(tu.cu), tu.cu->blocks[COMPONENT_Y]); + if (TempType != No_Template) + { + m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight(), TempType); + m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight(), TempType); + m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); + } + else + { + foundCandiNum = 1; + m_pcTrQuant->generateTM_DC_Prediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), 1 << (tu.cu->cs->sps->getBitDepth(CHANNEL_TYPE_LUMA) - 1)); + } +#else m_pcTrQuant->getTargetTemplate( tu.cu, pu.lwidth(), pu.lheight() ); m_pcTrQuant->candidateSearchIntra( tu.cu, pu.lwidth(), pu.lheight() ); m_pcTrQuant->generateTMPrediction( piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum ); +#endif CHECK( foundCandiNum < 1, "" ); } else if( PU::isMIP( pu, chType ) ) @@ -4922,7 +4955,7 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti PelBuf piResi = resiBuf.bufs[compID]; initIntraPatternChType(*tu.cu, area); -#if JVET_V0130_INTRA_TMP +#if JVET_V0130_INTRA_TMP && !JVET_W0069_TMP_BOUNDARY if( PU::isTmp( pu, chType ) ) { int foundCandiNum;