diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index 7ae48e0eee59ca7dc35c0dbd3b94dc9155a361c1..a56cbd4c749e6bbaea39c863fc5635dfed397f19 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -788,6 +788,10 @@ void EncApp::xInitLibCfg() m_cEncLib.setUseWrapAround ( m_wrapAround ); m_cEncLib.setWrapAroundOffset ( m_wrapAroundOffset ); +#if IDCC_TPM_JEM + m_cEncLib.setUseIntraTMP(m_IntraTMP); + m_cEncLib.setIntraTMPMaxSize(m_IntraTMP_MaxSize); +#endif #if ERICSSON_BIF m_cEncLib.setUseBIF ( m_BIF ); m_cEncLib.setBIFStrength ( m_BIFStrength ); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 68d9004719c2221b31f795f23f4aefdc2cff4211..bcfca9603ea36deba5ce19617c870b39fc173806 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -1044,12 +1044,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("AdditionalInterHypRefFrames", m_maxNumAddHypRefFrames, 4, "max. number of ref frames for additional inter hypotheseis") ("AdditionalInterHypTries", m_addHypTries, 1, "number of tries for additional inter prediction hypotheseis") #endif +#if IDCC_TPM_JEM + ("IntraTMP", m_IntraTMP, false, "intra Template Matching (0: off, 1:on) [default: on]") + ("IntraTMPMaxSize", m_IntraTMP_MaxSize, 64u, "intra Template Matching max CU size [default: 64]") +#endif #if ERICSSON_BIF - ("BIF", m_BIF, false, "bilateral filter (0: off, 1:on) [default: off]") + ("BIF", m_BIF, true, "bilateral filter (0: off, 1:on) [default: on]") ("BIFStrength", m_BIFStrength, 1u, "bilateral filter strength (0: half, 1: full, 2: double) [default: full]") ("BIFQPOffset", m_BIFQPOffset, 0, "bilateral filter QP offset (0: no offset) [default: 0]") #endif - // ADD_NEW_TOOL : (encoder app) add parsing parameters here ( "VirtualBoundariesPresentInSPSFlag", m_virtualBoundariesPresentFlag, true, "Virtual Boundary position information is signalled in SPS or PH (1:SPS, 0:PH) [default: on]" ) ("NumVerVirtualBoundaries", m_numVerVirtualBoundaries, 0u, "Number of vertical virtual boundaries (0-3, inclusive)") @@ -4173,6 +4176,17 @@ void EncAppCfg::xPrintParameter() } #endif } +#if IDCC_TPM_JEM + msg(DETAILS, "Intra TMP: %d\n", m_IntraTMP); + msg(DETAILS, "Max CU size of TMP: %d\n", m_IntraTMP_MaxSize); +#if IDCC_FixedComparisonPerPixel + msg(DETAILS, "dynamic search range with fixed comparison per pixel: \n"); + msg(DETAILS, " searchRangeWidth = %d*Width \n", IDCC_SearchRangeMultFactor); + msg(DETAILS, " searchRangeHeight = %d*Heigh \n", IDCC_SearchRangeMultFactor); +#else + msg(DETAILS, "search range: %d\n", IDCC_SEARCHRANGEINTRA); +#endif +#endif msg( DETAILS, "Max Num Merge Candidates : %d\n", m_maxNumMergeCand ); msg( DETAILS, "Max Num Affine Merge Candidates : %d\n", m_maxNumAffineMergeCand ); @@ -4306,6 +4320,10 @@ void EncAppCfg::xPrintParameter() { msg( VERBOSE, "WrapAroundOffset:%d ", m_wrapAroundOffset ); } +#if IDCC_TPM_JEM + msg( VERBOSE, "IntraTMP:%d ", m_IntraTMP); + msg( VERBOSE, "IntraTMP_MaxSize:%d ", m_IntraTMP_MaxSize); +#endif #if ERICSSON_BIF msg( VERBOSE, "BIF:%d ", m_BIF); msg( VERBOSE, "BIFStrength:%d ", m_BIFStrength); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 69abf35de5ed53d22ab7147ee4ba3e72a7dfa945..cc0289f8d3fa427b8ee4d562d6f882419eb5ceeb 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -411,6 +411,10 @@ protected: int m_maxNumAddHypRefFrames; ///< max. number of ref frames for additional inter hypotheseis int m_addHypTries; ///< max. number of tries for additional inter hypotheseis #endif +#if IDCC_TPM_JEM + bool m_IntraTMP; ///< intra Template Matching + unsigned m_IntraTMP_MaxSize; ///< max CU size for which intra TMP is allowed +#endif #if ERICSSON_BIF bool m_BIF; ///< bilateral filter unsigned m_BIFStrength; /// Bilateral filter strength diff --git a/source/App/Parcat/parcat.cpp b/source/App/Parcat/parcat.cpp index c19b982e04c947fd115232c40842ae193492d420..a40fc93a23a684be02840da74a940e10252a59e8 100644 --- a/source/App/Parcat/parcat.cpp +++ b/source/App/Parcat/parcat.cpp @@ -52,9 +52,8 @@ class ParcatHLSyntaxReader : public VLCReader bool parsePictureHeaderInSliceHeaderFlag ( ParameterSetManager *parameterSetManager ); }; -bool ParcatHLSyntaxReader::parsePictureHeaderInSliceHeaderFlag(ParameterSetManager *parameterSetManager) { - - +bool ParcatHLSyntaxReader::parsePictureHeaderInSliceHeaderFlag(ParameterSetManager *parameterSetManager) +{ uint32_t uiCode; READ_FLAG(uiCode, "picture_header_in_slice_header_flag"); return (uiCode==1); @@ -172,7 +171,7 @@ const char * NALU_TYPE[] = "NAL_UNIT_CODED_SLICE_GDR", "NAL_UNIT_RESERVED_IRAP_VCL11", "NAL_UNIT_RESERVED_IRAP_VCL12", - "NAL_UNIT_DPS", + "NAL_UNIT_DCI", "NAL_UNIT_VPS", "NAL_UNIT_SPS", "NAL_UNIT_PPS", @@ -195,8 +194,8 @@ const char * NALU_TYPE[] = int calc_poc(int iPOClsb, int prevTid0POC, int getBitsForPOC, int nalu_type) { - int iPrevPOC = prevTid0POC; - int iMaxPOClsb = 1<< getBitsForPOC; + int iPrevPOC = prevTid0POC; + int iMaxPOClsb = 1<< getBitsForPOC; int iPrevPOClsb = iPrevPOC & (iMaxPOClsb - 1); int iPrevPOCmsb = iPrevPOC-iPrevPOClsb; int iPOCmsb; @@ -218,9 +217,9 @@ int calc_poc(int iPOClsb, int prevTid0POC, int getBitsForPOC, int nalu_type) std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int * poc_base, int * last_idr_poc) { - const uint8_t * p = v.data(); + const uint8_t * p = v.data(); const uint8_t * buf = v.data(); - int sz = (int) v.size(); + int sz = (int) v.size(); int nal_start, nal_end; int off = 0; int cnt = 0; @@ -229,9 +228,9 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int std::vector<uint8_t> out; out.reserve(v.size()); - int bits_for_poc = 8; + int bits_for_poc = 8; bool skip_next_sei = false; - bool change_poc = false; + bool change_poc = false; bool first_idr_slice_after_ph_nal = false; while(find_nal_unit(p, sz, &nal_start, &nal_end) > 0) @@ -252,7 +251,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int #if ENABLE_TRACING printf ("NALU Type: %d (%s)\n", nalu_type, NALU_TYPE[nalu_type]); #endif - int poc = -1; + int poc = -1; int poc_lsb = -1; int new_poc = -1; @@ -307,28 +306,41 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int // beginning of picture header parsing parcatHLSReader.parsePictureHeaderUpToPoc(¶meterSetManager); int num_bits_up_to_poc_lsb = parcatHLSReader.getBitstream()->getNumBitsRead(); - int offset = num_bits_up_to_poc_lsb; + int num_emul_prev_code_before_poc = 0; + for (int i=0; i<parcatHLSReader.getBitstream()->numEmulationPreventionBytesRead(); i++) + { + if (8*parcatHLSReader.getBitstream()->getEmulationPreventionByteLocation(i) <= num_bits_up_to_poc_lsb) + num_emul_prev_code_before_poc++; + } + int offset = num_bits_up_to_poc_lsb + (num_emul_prev_code_before_poc << 3); int byte_offset = offset / 8; - int hi_bits = offset % 8; - uint16_t data = (nalu[byte_offset] << 8) | nalu[byte_offset + 1]; - int low_bits = 16 - hi_bits - bits_for_poc; + int hi_bits = offset % 8; + uint16_t data = (nalu[byte_offset] << 8) | nalu[byte_offset + 1]; + int low_bits = 16 - hi_bits - bits_for_poc; poc_lsb = (data >> low_bits) & 0xff; - poc = poc_lsb; //calc_poc(poc_lsb, 0, bits_for_poc, nalu_type); + poc = poc_lsb; //calc_poc(poc_lsb, 0, bits_for_poc, nalu_type); new_poc = poc + *poc_base; // int picOrderCntLSB = (pcSlice->getPOC()-pcSlice->getLastIDR()+(1<<pcSlice->getSPS()->getBitsForPOC())) & ((1<<pcSlice->getSPS()->getBitsForPOC())-1); unsigned picOrderCntLSB = (new_poc - *last_idr_poc + (1 << bits_for_poc)) & ((1 << bits_for_poc) - 1); int low = data & ((1 << low_bits) - 1); - int hi = data >> (16 - hi_bits); - data = (hi << (16 - hi_bits)) | (picOrderCntLSB << low_bits) | low; + int hi = data >> (16 - hi_bits); + data = (hi << (16 - hi_bits)) | (picOrderCntLSB << low_bits) | low; nalu[byte_offset] = data >> 8; nalu[byte_offset + 1] = data & 0xff; #if ENABLE_TRACING - std::cout << "Changed poc " << poc << " to " << new_poc << std::endl; + std::cout << "Changed poc " << poc << " to " << new_poc << " at offset " << offset << " bits"; + if (num_emul_prev_code_before_poc) + { + std::cout << " with " << num_emul_prev_code_before_poc << " emulation prevention code at byte pos "; + for (int i=0; i<num_emul_prev_code_before_poc; i++) + std::cout << parcatHLSReader.getBitstream()->getEmulationPreventionByteLocation(i) << " "; + } + std::cout << std::endl; #endif ++cnt; change_poc = false; @@ -340,7 +352,8 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int skip_next_sei = true; idr_found = true; } - if ((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)) || ((idx > 1 && !idr_found) && (nalu_type == NAL_UNIT_DCI || nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_PREFIX_APS || nalu_type == NAL_UNIT_SUFFIX_APS || nalu_type == NAL_UNIT_PH || nalu_type == NAL_UNIT_ACCESS_UNIT_DELIMITER)) + if ((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)) + || ((idx > 1 && !idr_found) && (nalu_type == NAL_UNIT_DCI || nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_PREFIX_APS || nalu_type == NAL_UNIT_SUFFIX_APS || nalu_type == NAL_UNIT_PH || nalu_type == NAL_UNIT_ACCESS_UNIT_DELIMITER)) || (nalu_type == NAL_UNIT_SUFFIX_SEI && skip_next_sei)) { } @@ -355,8 +368,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int skip_next_sei = false; } - - p += (nal_end - nal_start); + p += (nal_end - nal_start); sz -= nal_end; } @@ -408,12 +420,12 @@ int main(int argc, char * argv[]) } FILE * fdo = fopen(argv[argc - 1], "wb"); - if (fdo==NULL) + if (fdo == NULL) { fprintf(stderr, "Error: could not open output file: %s", argv[argc - 1]); exit(1); } - int poc_base = 0; + int poc_base = 0; int last_idr_poc = 0; initROM(); diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index bcb690f4cc368cb182653964b15c00c49b0e287b..77b337940d2250c8f59061bf36275758c294ed6d 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -477,6 +477,14 @@ static const int ALF_VB_POS_ABOVE_CTUROW_CHMA = 2; #if W0038_DB_OPT static const int MAX_ENCODER_DEBLOCKING_QUALITY_LAYERS = 8 ; #endif +#if IDCC_TPM_JEM && !IDCC_FixedComparisonPerPixel +static const int SEARCHRANGEINTRA = IDCC_SEARCHRANGEINTRA; ///< Intra search range (-SEARCHRANGE,+SEARCHRANGE) +#endif + +#if IDCC_TPM_JEM +static const int USE_MORE_BLOCKSIZE_DEPTH_MAX = IDCC_TMP_MaxSize_Depth - 1; +static const int INIT_THRESHOULD_SHIFTBITS = 2; ///< (default 2) Early skip threshold for checking distance. +#endif #if SHARP_LUMA_DELTA_QP static const uint32_t LUMA_LEVEL_TO_DQP_LUT_MAXSIZE = 1024; ///< max LUT size for QP offset based on luma diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp index 803d7266d331157498a4f7e4edd92e706b55f3e8..7855d636f03d74734a51f7f92acb5460e63cabfc 100644 --- a/source/Lib/CommonLib/ContextModelling.cpp +++ b/source/Lib/CommonLib/ContextModelling.cpp @@ -778,6 +778,24 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) PU::restrictBiPredMergeCandsOne(pu); } +#if IDCC_TPM_JEM +unsigned DeriveCtx::CtxTmpFlag(const CodingUnit& cu) +{ + const CodingStructure* cs = cu.cs; + unsigned ctxId = 0; + + const CodingUnit* cuLeft = cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L); + ctxId = (cuLeft && cuLeft->TmpFlag) ? 1 : 0; + + const CodingUnit* cuAbove = cs->getCURestricted(cu.lumaPos().offset(0, -1), cu, CH_L); + ctxId += (cuAbove && cuAbove->TmpFlag) ? 1 : 0; + + ctxId = (cu.lwidth() > 2 * cu.lheight() || cu.lheight() > 2 * cu.lwidth()) ? 3 : ctxId; + + return ctxId; +} +#endif + unsigned DeriveCtx::CtxMipFlag( const CodingUnit& cu ) { const CodingStructure *cs = cu.cs; diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index 5f26cface3f1284188fa5a79e9e8efad9274cba0..6e3ad4616ecfce51ec1eacf7418eb2608fce7406 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -611,6 +611,9 @@ unsigned CtxAffineFlag( const CodingUnit& cu ); unsigned CtxPredModeFlag( const CodingUnit& cu ); unsigned CtxIBCFlag(const CodingUnit& cu); unsigned CtxMipFlag ( const CodingUnit& cu ); +#if IDCC_TPM_JEM +unsigned CtxTmpFlag(const CodingUnit& cu); +#endif unsigned CtxPltCopyFlag( const unsigned prevRunType, const unsigned dist ); #if ENABLE_DIMD unsigned CtxDIMDFlag(const CodingUnit& cu); diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index 62e393cbc28c505f0b5c0875757e43e728383c33..db49d40b90b8583e77883a1d410ba6feddce1652 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -1000,6 +1000,17 @@ const CtxSet ContextSetCfg::MipFlag = ContextSetCfg::addCtxSet { 9, 9, 8, 6 }, { 10, 10, 9, 6 } }); +#if IDCC_TPM_JEM +const CtxSet ContextSetCfg::TmpFlag = ContextSetCfg::addCtxSet +({ + { CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, }, + { DWS, DWS, DWS, DWS, }, + { DWS, DWS, DWS, DWS, }, + { DWS, DWS, DWS, DWS, }, + }); +#endif #if MMLM const CtxSet ContextSetCfg::MMLMFlag = ContextSetCfg::addCtxSet @@ -2078,6 +2089,16 @@ const CtxSet ContextSetCfg::MipFlag = ContextSetCfg::addCtxSet { 33, 49, 50, 25, }, { 9, 10, 9, 6, }, }); +#if IDCC_TPM_JEM +const CtxSet ContextSetCfg::TmpFlag = ContextSetCfg::addCtxSet +({ + { CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, }, + { DWS, DWS, DWS, DWS, }, + }); +#endif + #if MMLM const CtxSet ContextSetCfg::MMLMFlag = ContextSetCfg::addCtxSet diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index c00f64f86d104c13a8549cf0d7bc7b344dc8f8a9..41b9f313e1d7fa7bb0e2a98ac730fd0b5fd44f1c 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -245,6 +245,9 @@ public: static const CtxSet CclmModeIdx; static const CtxSet IntraChromaPredMode; static const CtxSet MipFlag; +#if IDCC_TPM_JEM + static const CtxSet TmpFlag; +#endif #if MMLM static const CtxSet MMLMFlag; #endif diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 621d983f2f27720ab7eee8823a55c923e0bb574e..ea5eff9a761587742da206c07731ba1a8c8cc9b3 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -679,6 +679,9 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA if( sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag() || !isLuma( chType ) || useISP +#if IDCC_TPM_JEM + || PU::isTmp(pu, chType) +#endif || PU::isMIP( pu, chType ) || m_ipaParam.multiRefIndex || DC_IDX == dirMode @@ -1384,7 +1387,53 @@ void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const Comp } } +#if IDCC_TPM_JEM +bool IntraPrediction::isRefTemplateAvailable(CodingUnit& cu, CompArea& area) +{ + const ChannelType chType = toChannelType(area.compID); + const CodingStructure& cs = *cu.cs; + const SPS& sps = *cs.sps; + const PreCalcValues& pcv = *cs.pcv; + + + const int tuWidth = area.width; + const int tuHeight = area.height; + const int predSize = m_topRefLength; + const int predHSize = m_leftRefLength; + //const int predStride = predSize; + + + const int unitWidth = pcv.minCUWidth >> getComponentScaleX(area.compID, sps.getChromaFormatIdc()); + const int unitHeight = pcv.minCUHeight >> getComponentScaleY(area.compID, sps.getChromaFormatIdc()); + + const int totalAboveUnits = (predSize + (unitWidth - 1)) / unitWidth; + const int totalLeftUnits = (predHSize + (unitHeight - 1)) / unitHeight; + const int totalUnits = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left + const int numAboveUnits = std::max<int>(tuWidth / unitWidth, 1); + const int numLeftUnits = std::max<int>(tuHeight / unitHeight, 1); + const int numAboveRightUnits = totalAboveUnits - numAboveUnits; + const int numLeftBelowUnits = totalLeftUnits - numLeftUnits; + + if (numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0) + return false; + // ----- Step 1: analyze neighborhood ----- + const Position posLT = area; + //const Position posRT = area.topRight(); + //const Position posLB = area.bottomLeft(); + + bool neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1]; + //int numIntraNeighbor = 0; + + memset(neighborFlags, 0, totalUnits); + + //bool retVal = 1; + + return isAboveLeftAvailable(cu, chType, posLT) && isAboveAvailable(cu, chType, posLT, numAboveUnits, unitWidth, (neighborFlags + totalLeftUnits + 1)) && isLeftAvailable(cu, chType, posLT, numLeftUnits, unitHeight, (neighborFlags + totalLeftUnits - 1)); + + //return retVal; +} +#endif void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu ) { const ChannelType chType = toChannelType( area.compID ); diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index 87796b451d113951a92405c36b26859b1fffe8db..5d7ae3173634dd702b00ccf6248c3021e7e5f51e 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -144,6 +144,9 @@ protected: void xPredIntraBDPCM ( const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng ); Pel xGetPredValDc ( const CPelBuf &pSrc, const Size &dstSize ); +#if IDCC_TPM_JEM + bool isRefTemplateAvailable(CodingUnit& cu, CompArea& area); +#endif void xFillReferenceSamples ( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu ); void xFilterReferenceSamples(const Pel *refBufUnfiltered, Pel *refBufFiltered, const CompArea &area, const SPS &sps, diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index 96be6fd96a2556a72ea798e4a8b795ea838f67a7..6c40305b13876f969e58eef7eb492ca2cca28c00 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -3113,6 +3113,10 @@ SPS::SPS() #if ENABLE_DIMD , m_dimd ( false ) #endif +#if IDCC_TPM_JEM +, m_IntraTMP ( false ) +, m_IntraTMP_MaxSize ( 64 ) +#endif #if ENABLE_OBMC , m_OBMC ( false ) #endif diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 41ffe7e35bcfca51949725357ad24990f6e93492..fde7178a8208baf651a05f9c407ca10ceb78e228 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -1651,6 +1651,10 @@ private: #if ENABLE_DIMD bool m_dimd; #endif +#if IDCC_TPM_JEM + bool m_IntraTMP; ///< intra Template Matching + unsigned m_IntraTMP_MaxSize; ///< max CU size for which intra TMP is allowed +#endif #if ENABLE_OBMC bool m_OBMC; #endif @@ -2073,6 +2077,12 @@ void setCCALFEnabledFlag( bool b ) void setUseDimd ( bool b ) { m_dimd = b; } bool getUseDimd () const { return m_dimd; } #endif +#if IDCC_TPM_JEM + void setUseIntraTMP(bool b) { m_IntraTMP = b; } + bool getUseIntraTMP() const { return m_IntraTMP; } + void setIntraTMPMaxSize(unsigned n) { m_IntraTMP_MaxSize = n; } + unsigned getIntraTMPMaxSize() const { return m_IntraTMP_MaxSize; } +#endif #if ENABLE_OBMC void setUseOBMC ( bool b ) { m_OBMC = b; } bool getUseOBMC () const { return m_OBMC; } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index d67d07aa2477be9d02a7689b4715d82b2679fe5c..0ad97c3e3efc6fbceafb7aa6d143713650e4774a 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -55,6 +55,16 @@ #include "CommonLib/CodingStatistics.h" #endif +#if IDCC_TMP_SIMD +#include "CommonDefX86.h" +#endif + +#if IDCC_TPM_JEM + +unsigned int g_uiDepth2Width[5] = { 4, 8, 16, 32, 64 }; +#endif + + struct coeffGroupRDStats { int iNNZbeforePos0; @@ -187,6 +197,9 @@ TrQuant::TrQuant() : m_quant( nullptr ) m_fwdICT[-2] = fwdTransformCbCr<-2>; m_fwdICT[ 3] = fwdTransformCbCr< 3>; m_fwdICT[-3] = fwdTransformCbCr<-3>; +#if IDCC_TPM_JEM + m_pppTarPatch = NULL; +#endif } } @@ -197,6 +210,33 @@ TrQuant::~TrQuant() delete m_quant; m_quant = nullptr; } +#if IDCC_TPM_JEM +#endif + +#if IDCC_TPM_JEM + if (m_pppTarPatch != NULL) + { + for (unsigned int uiDepth = 0; uiDepth < USE_MORE_BLOCKSIZE_DEPTH_MAX; uiDepth++) + { + unsigned int blkSize = g_uiDepth2Width[uiDepth]; + + unsigned int patchSize = blkSize + IDCC_TemplateSize; + for (unsigned int uiRow = 0; uiRow < patchSize; uiRow++) + { + if (m_pppTarPatch[uiDepth][uiRow] != NULL) + { + delete[]m_pppTarPatch[uiDepth][uiRow]; m_pppTarPatch[uiDepth][uiRow] = NULL; + } + } + if (m_pppTarPatch[uiDepth] != NULL) + { + delete[]m_pppTarPatch[uiDepth]; m_pppTarPatch[uiDepth] = NULL; + } + } + delete[] m_pppTarPatch; + m_pppTarPatch = NULL; + } +#endif } #if ENABLE_SPLIT_PARALLELISM @@ -234,6 +274,27 @@ void TrQuant::init( const Quant* otherQuant, m_quant->init( uiMaxTrSize, bUseRDOQ, bUseRDOQTS, useSelectiveRDOQ ); } + +#if IDCC_TPM_JEM + unsigned int blkSize; + + if (m_pppTarPatch == NULL) + { + m_pppTarPatch = new Pel * *[USE_MORE_BLOCKSIZE_DEPTH_MAX]; + for (unsigned int uiDepth = 0; uiDepth < USE_MORE_BLOCKSIZE_DEPTH_MAX; uiDepth++) + { + blkSize = g_uiDepth2Width[uiDepth]; + + unsigned int patchSize = blkSize + IDCC_TemplateSize; + m_pppTarPatch[uiDepth] = new Pel * [patchSize]; + for (unsigned int uiRow = 0; uiRow < patchSize; uiRow++) + { + m_pppTarPatch[uiDepth][uiRow] = new Pel[patchSize]; + } + } +} +#endif + #if TU_256 fastFwdTrans = { { @@ -374,6 +435,581 @@ void TrQuant::invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32 } } +#if IDCC_TPM_JEM +void insertNode(DistType diff, int& iXOffset, int& iYOffset, DistType& pDiff, int& pX, int& pY, short& pId, unsigned int& setId) +{ + pDiff = diff; + pX = iXOffset; + pY = iYOffset; + pId = setId; +} +#if IDCC_TPM_JEM +#if IDCC_FixedComparisonPerPixel +void clipMvIntraConstraint(CodingUnit* pcCU, int regionId, int& iHorMin, int& iHorMax, int& iVerMin, int& iVerMax, unsigned int uiTemplateSize, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int iCurrY, int iCurrX, int offsetLCUY, int offsetLCUX) +#else +void clipMvIntraConstraint(CodingUnit* pcCU, int regionId, int& iHorMin, int& iHorMax, int& iVerMin, int& iVerMax, int iRange, unsigned int uiTemplateSize, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int iCurrY, int iCurrX, int offsetLCUY, int offsetLCUX) +#endif +{ +#if IDCC_FixedComparisonPerPixel + int SearchRange_Height, SearchRange_Width; + + SearchRange_Width = IDCC_SearchRangeMultFactor * uiBlkWidth; + SearchRange_Height = IDCC_SearchRangeMultFactor * uiBlkHeight; +#else + int SearchRange_Width = IDCC_SEARCHRANGEINTRA; + int SearchRange_Height = IDCC_SEARCHRANGEINTRA; +#endif + int iMvShift = 0; + int iTemplateSize = uiTemplateSize; + int iBlkWidth = uiBlkWidth; + int iBlkHeight = uiBlkHeight; + if (regionId == 0) //above outside LCU + { + iHorMax = std::min((iCurrX + SearchRange_Width) << iMvShift, (int)((pcCU->cs->sps->getMaxPicWidthInLumaSamples() - iBlkWidth) << iMvShift)); + iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - SearchRange_Width) << iMvShift); + + iVerMax = (iCurrY - iBlkHeight - offsetLCUY) << iMvShift; + iVerMin = std::max(((iTemplateSize) << iMvShift), ((iCurrY - SearchRange_Height) << iMvShift)); + + iHorMin = iHorMin - iCurrX; + iHorMax = iHorMax - iCurrX; + iVerMax = iVerMax - iCurrY; + iVerMin = iVerMin - iCurrY; + } + else if (regionId == 1) //left outside LCU + { + iHorMax = (iCurrX - offsetLCUX - iBlkWidth) << iMvShift; + iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - SearchRange_Width) << iMvShift); + + iVerMin = std::max((iTemplateSize) << iMvShift, (iCurrY - iBlkHeight - offsetLCUY) << iMvShift); + iVerMax = (iCurrY) << iMvShift; + + iHorMin = iHorMin - iCurrX; + iHorMax = iHorMax - iCurrX; + iVerMax = iVerMax - iCurrY; + iVerMin = iVerMin - iCurrY; + } + else if (regionId == 2) //left outside LCU (can reach the bottom row of LCU) + { + iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - SearchRange_Width) << iMvShift); + iHorMax = (iCurrX - offsetLCUX - iBlkWidth) << iMvShift; + iVerMin = (iCurrY + 1) << iMvShift; + iVerMax = std::min(pcCU->cs->sps->getMaxPicHeightInLumaSamples() - iBlkHeight, (iCurrY - offsetLCUY + pcCU->cs->sps->getCTUSize() - iBlkHeight) << iMvShift); + + iHorMin = iHorMin - iCurrX; + iHorMax = iHorMax - iCurrX; + iVerMax = iVerMax - iCurrY; + iVerMin = iVerMin - iCurrY; + } +} +#endif +#endif + +#if IDCC_TPM_JEM +TempLibFast::TempLibFast() +{ +} + +TempLibFast::~TempLibFast() +{ +} +#endif + +#if IDCC_TPM_JEM +void TempLibFast::initTemplateDiff(unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int bitDepth) +{ +#if VCEG_AZ08_USE_SAD_DISTANCE + DistType maxValue = ((1 << bitDepth) >> (INIT_THRESHOULD_SHIFTBITS)) * (uiPatchHeight * uiPatchWidth - uiBlkHeight * uiBlkWidth); +#endif +#if VCEG_AZ08_USE_SSD_DISTANCE + DistType maxValue = ((1 << bitDepth) >> (INIT_THRESHOULD_SHIFTBITS)) * ((1 << bitDepth) >> (INIT_THRESHOULD_SHIFTBITS)) * (uiPatchSize * uiPatchSize - uiBlkSize * uiBlkSize); +#endif + m_diffMax = maxValue; + { + m_pDiff = maxValue; + } +} + +void TrQuant::getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight) +{ + const ComponentID compID = COMPONENT_Y; + unsigned int uiPatchWidth = uiBlkWidth + IDCC_TemplateSize; + unsigned int uiPatchHeight = uiBlkHeight + IDCC_TemplateSize; + unsigned int uiTarDepth = floorLog2(std::max(uiBlkHeight, uiBlkWidth)) - 2; + Pel** tarPatch = m_pppTarPatch[uiTarDepth]; + CompArea area = pcCU->blocks[compID]; + Pel* pCurrStart = pcCU->cs->picture->getRecoBuf(area).buf; + unsigned int uiPicStride = pcCU->cs->picture->getRecoBuf(compID).stride; + unsigned int uiY, uiX; + + + + //fill template + //up-left & up + Pel* tarTemp; + Pel* pCurrTemp = pCurrStart - IDCC_TemplateSize * uiPicStride - IDCC_TemplateSize; + for (uiY = 0; uiY < IDCC_TemplateSize; uiY++) + { + tarTemp = tarPatch[uiY]; + for (uiX = 0; uiX < uiPatchWidth; uiX++) + { + tarTemp[uiX] = pCurrTemp[uiX]; + } + pCurrTemp += uiPicStride; + } + //left + for (uiY = IDCC_TemplateSize; uiY < uiPatchHeight; uiY++) + { + tarTemp = tarPatch[uiY]; + for (uiX = 0; uiX < IDCC_TemplateSize; uiX++) + { + tarTemp[uiX] = pCurrTemp[uiX]; + } + pCurrTemp += uiPicStride; + } +} + +void TrQuant::candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight) +{ + const ComponentID compID = COMPONENT_Y; + const int channelBitDepth = pcCU->cs->sps->getBitDepth(toChannelType(compID)); + unsigned int uiPatchWidth = uiBlkWidth + IDCC_TemplateSize; + unsigned int uiPatchHeight = uiBlkHeight + IDCC_TemplateSize; + unsigned int uiTarDepth = floorLog2(std::max(uiBlkWidth, uiBlkHeight)) - 2; + Pel** tarPatch = getTargetPatch(uiTarDepth); + //Initialize the library for saving the best candidates + m_tempLibFast.initTemplateDiff(uiPatchWidth, uiPatchHeight, uiBlkWidth, uiBlkHeight, channelBitDepth); + short setId = 0; //record the reference picture. + searchCandidateFromOnePicIntra(pcCU, tarPatch, uiPatchWidth, uiPatchHeight, setId); + //count collected candidate number + DistType pDiff = m_tempLibFast.getDiff(); + DistType maxDiff = m_tempLibFast.getDiffMax(); + + + if (pDiff < maxDiff) + m_uiVaildCandiNum = 1; + else + m_uiVaildCandiNum = 0; +} + +void TrQuant::searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId) +{ + const ComponentID compID = COMPONENT_Y; + unsigned int uiBlkWidth = uiPatchWidth - IDCC_TemplateSize; + unsigned int uiBlkHeight = uiPatchHeight - IDCC_TemplateSize; + + int pX = m_tempLibFast.getX(); + int pY = m_tempLibFast.getY(); + DistType pDiff = m_tempLibFast.getDiff(); + short pId = m_tempLibFast.getId(); + CompArea area = pcCU->blocks[compID]; + int refStride = pcCU->cs->picture->getRecoBuf(compID).stride; + + Pel* ref = pcCU->cs->picture->getRecoBuf(area).buf; + + setRefPicUsed(ref); //facilitate the access of each candidate point + + setStride(refStride); +#if !IDCC_FixedComparisonPerPixel + int iSrchRng = SEARCHRANGEINTRA; +#endif + + + Mv cTmpMvPred; + cTmpMvPred.setZero(); + + unsigned int uiCUPelY = area.pos().y; + unsigned int uiCUPelX = area.pos().x; + int blkX = 0; + int blkY = 0; + int iCurrY = uiCUPelY + blkY; + int iCurrX = uiCUPelX + blkX; + + Position ctuRsAddr = CU::getCtuXYAddr(*pcCU); + int offsetLCUY = iCurrY - ctuRsAddr.y; + int offsetLCUX = iCurrX - ctuRsAddr.x; + + + int iYOffset, iXOffset; + DistType diff; + Pel* refCurr; + +#if IDCC_SignleSearchRegion + int mvYMins; + int mvYMaxs; + int mvXMins; + int mvXMaxs; +#else +#define REGION_NUM 3 + int mvYMins[REGION_NUM]; + int mvYMaxs[REGION_NUM]; + int mvXMins[REGION_NUM]; + int mvXMaxs[REGION_NUM]; + int regionNum = REGION_NUM; + int regionId = 0; +#endif + +#if IDCC_TMP_Within_CTU && !IDCC_SignleSearchRegion + //1. check the near pixels within LCU + //above pixels in LCU + int iTemplateSize = IDCC_TemplateSize; + int iBlkWidth = uiBlkWidth; + int iBlkHeight = uiBlkHeight; + regionId = 0; + int iMvShift = 0; + + + int iVerMin = std::max(((iTemplateSize) << iMvShift), (iCurrY - offsetLCUY - iBlkHeight + 1) << iMvShift); + int iVerMax = (iCurrY - iBlkHeight) << iMvShift; + int iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - offsetLCUX - iBlkWidth + 1) << iMvShift); + int iHorMax = (iCurrX - iBlkWidth); + + mvXMins[regionId] = iHorMin - iCurrX; + mvXMaxs[regionId] = iHorMax - iCurrX; + mvYMins[regionId] = iVerMin - iCurrY; + mvYMaxs[regionId] = iVerMax - iCurrY; + + + + //check within CTU pixels + for (regionId = 0; regionId < 1; regionId++) + { + int mvYMin = mvYMins[regionId]; + int mvYMax = mvYMaxs[regionId]; + int mvXMin = mvXMins[regionId]; + int mvXMax = mvXMaxs[regionId]; + if (mvYMax < mvYMin || mvXMax < mvXMin) + { + continue; + } + for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset--) + { + for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset--) + { + refCurr = ref + iYOffset * refStride + iXOffset; + diff = calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff); + if (diff < (pDiff)) + { + insertNode(diff, iXOffset, iYOffset, pDiff, pX, pY, pId, setId); + } + if (pDiff == 0) + { + regionId++; + } + } + } + } +#endif +#if IDCC_SignleSearchRegion + +#if IDCC_FixedComparisonPerPixel + int SearchRange_Height, SearchRange_Width; + // No. of comparison per pixel is: + // (searchRange_width - Width - TempSize_width + 1) *( searchRange_height - Height - TempSize + 1) / Width / Height + // to have a constant comparison per pixel: + // (searchRange_width - Width - TempSize_width + 1)/Width must be const = CC + // (searchRange_height - Height - TempSize + 1)/ Height must be constant = CC + + //searchRange_width = CC*Width + Width + TempSize_width - 1; + //searchRange_height = CC*Height + Height + TempSize_height - 1; + + + SearchRange_Width = IDCC_SearchRangeMultFactor * uiBlkWidth + uiBlkWidth + iTempSize - 1; + SearchRange_Height = IDCC_SearchRangeMultFactor * uiBlkHeight + uiBlkHeight + iTempSize - 1; +#endif + int iTempSize = uiTempSize; + + //int iMvShift = 0; + int iBlkWidth = uiBlkWidth; + int iBlkHeight = uiBlkHeight; + +#if IDCC_FixedComparisonPerPixel + mvYMins = std::max(iTempSize, iCurrY - SearchRange_Height); +#else + mvYMins = std::max(iTempSize, (iCurrY - iSrchRng)); +#endif + mvYMaxs = iCurrY - iBlkHeight; +#if IDCC_FixedComparisonPerPixel + mvXMins = std::max(iTempSize, (iCurrX - SearchRange_Width)); +#else + mvXMins = std::max(iTempSize, (iCurrX - iSrchRng)); +#endif + mvXMaxs = (iCurrX - iBlkWidth); + + mvXMins = mvXMins - iCurrX; + mvXMaxs = mvXMaxs - iCurrX; + mvYMaxs = mvYMaxs - iCurrY; + mvYMins = mvYMins - iCurrY; +#endif + +#if !IDCC_SignleSearchRegion + //2. check the pixels outside CTU + for (regionId = 0; regionId < regionNum; regionId++) + {// this function fills in the range the template matching for pixels outside the current CTU +#if IDCC_FixedComparisonPerPixel + clipMvIntraConstraint(pcCU, regionId, mvXMins[regionId], mvXMaxs[regionId], mvYMins[regionId], mvYMaxs[regionId], IDCC_TemplateSize, uiBlkWidth, uiBlkHeight, iCurrY, iCurrX, offsetLCUY, offsetLCUX); +#else + clipMvIntraConstraint(pcCU, regionId, mvXMins[regionId], mvXMaxs[regionId], mvYMins[regionId], mvYMaxs[regionId], iSrchRng, uiTempSize, uiBlkWidth, uiBlkHeight, iCurrY, iCurrX, offsetLCUY, offsetLCUX); +#endif + } +#endif +#if !IDCC_SignleSearchRegion + for (regionId = 0; regionId < regionNum; regionId++) +#endif + { +#if IDCC_SignleSearchRegion + int mvYMin = mvYMins; + int mvYMax = mvYMaxs; + int mvXMin = mvXMins; + int mvXMax = mvXMaxs; +#else + int mvYMin = mvYMins[regionId]; + int mvYMax = mvYMaxs[regionId]; + int mvXMin = mvXMins[regionId]; + int mvXMax = mvXMaxs[regionId]; +#endif + if ( mvYMax < mvYMin || mvXMax < mvXMin ) + { +#if IDCC_SignleSearchRegion + return; +#else + continue; +#endif + } + for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset--) + { + for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset--) + { + refCurr = ref + iYOffset * refStride + iXOffset; + diff = calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff); + if (diff < (pDiff)) + { + insertNode(diff, iXOffset, iYOffset, pDiff, pX, pY, pId, setId); + } + if (pDiff == 0) + { + regionId = regionNum; + } + } + } + } + m_tempLibFast.m_pX = pX; + m_tempLibFast.m_pY = pY; + m_tempLibFast.m_pDiff = pDiff; + m_tempLibFast.m_pId = pId; +} +bool TrQuant::generateTMPrediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int& foundCandiNum) +{ + bool bSucceedFlag = true; + unsigned int uiPatchWidth = uiBlkWidth + IDCC_TemplateSize; + unsigned int uiPatchHeight = uiBlkHeight + IDCC_TemplateSize; + + foundCandiNum = m_uiVaildCandiNum; + if (foundCandiNum < 1) + { + return false; + } + + int pX = m_tempLibFast.getX(); + int pY = m_tempLibFast.getY(); + Pel* ref; + int picStride = getStride(); + int iOffsetY, iOffsetX; + Pel* refTarget; + unsigned int uiHeight = uiPatchHeight - IDCC_TemplateSize; + unsigned int uiWidth = uiPatchWidth - IDCC_TemplateSize; + + //the data center: we use the prediction block as the center now. + //collect the candidates + ref = getRefPicUsed(); + { + iOffsetY = pY; + iOffsetX = pX; + refTarget = ref + iOffsetY * picStride + iOffsetX; + for (unsigned int uiY = 0; uiY < uiHeight; uiY++) + { + for (unsigned int uiX = 0; uiX < uiWidth; uiX++) + { + piPred[uiX] = refTarget[uiX]; + } + refTarget += picStride; + piPred += uiStride; + } + } + return bSucceedFlag; +} + +DistType TrQuant::calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, DistType iMax) +{ +#if IDCC_TMP_SIMD + DistType iDiffSum = 0; + int iY; + Pel* refPatchRow = ref - IDCC_TemplateSize * uiStride - IDCC_TemplateSize; + Pel* tarPatchRow; + + uint32_t uiSum; + // horizontal difference + for (iY = 0; iY < IDCC_TemplateSize; iY++) + { + tarPatchRow = tarPatch[iY]; + const short* pSrc1 = (const short*)tarPatchRow; + const short* pSrc2 = (const short*)refPatchRow; + + // SIMD difference + //int iRows = uiPatchHeight; + int iCols = uiPatchWidth; + if ((iCols & 7) == 0) + { + // Do with step of 8 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for (int iX = 0; iX < iCols; iX += 8) + { + __m128i vsrc1 = _mm_loadu_si128((const __m128i*)(&pSrc1[iX])); + __m128i vsrc2 = _mm_lddqu_si128((const __m128i*)(&pSrc2[iX])); + vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + uiSum = _mm_cvtsi128_si32(vsum32); + } + else + { + // Do with step of 4 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for (int iX = 0; iX < iCols; iX += 4) + { + __m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]); + __m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]); + vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + uiSum = _mm_cvtsi128_si32(vsum32); + } + iDiffSum += uiSum; + + if (iDiffSum > iMax) //for speeding up + { + return iDiffSum; + } + // update location + refPatchRow += uiStride; + } + + // vertical difference + int iCols = IDCC_TemplateSize; + for (iY = IDCC_TemplateSize; iY < uiPatchHeight; iY++) + { + tarPatchRow = tarPatch[iY]; + const short* pSrc1 = (const short*)tarPatchRow; + const short* pSrc2 = (const short*)refPatchRow ; + + // SIMD difference + + // Do with step of 4 + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + //for (int iY = 0; iY < iRows; iY += iSubStep) + { + __m128i vsum16 = vzero; + for (int iX = 0; iX < iCols; iX += 4) + { + __m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]); + __m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]); + vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2))); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + //pSrc1 += iStrideSrc1; + //pSrc2 += iStrideSrc2; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + uiSum = _mm_cvtsi128_si32(vsum32); + + iDiffSum += uiSum; + + if (iDiffSum > iMax) //for speeding up + { + return iDiffSum; + } + // update location + refPatchRow += uiStride; + } + + return iDiffSum; + +#else + int iY, iX; +#if VCEG_AZ08_USE_SSD_DISTANCE + int iDiff; +#endif + DistType iDiffSum = 0; + Pel* refPatchRow = ref - IDCC_TemplateSize * uiStride - IDCC_TemplateSize; + Pel* tarPatchRow; + for (iY = 0; iY < IDCC_TemplateSize; iY++) + { + tarPatchRow = tarPatch[iY]; + for (iX = 0; iX < uiPatchWidth; iX++) + { +#if VCEG_AZ08_USE_SAD_DISTANCE + iDiffSum += abs(refPatchRow[iX] - tarPatchRow[iX]); +#endif +#if VCEG_AZ08_USE_SSD_DISTANCE + iDiff = refPatchRow[iX] - tarPatchRow[iX]; + iDiffSum += iDiff * iDiff; +#endif + } + if (iDiffSum > iMax) //for speeding up + { + return iDiffSum; + } + refPatchRow += uiStride; + } + for (iY = IDCC_TemplateSize; iY < uiPatchHeight; iY++) + { + tarPatchRow = tarPatch[iY]; + for (iX = 0; iX < uiTempSize; iX++) + { +#if VCEG_AZ08_USE_SAD_DISTANCE + iDiffSum += abs(refPatchRow[iX] - tarPatchRow[iX]); +#endif +#if VCEG_AZ08_USE_SSD_DISTANCE + iDiff = refPatchRow[iX] - tarPatchRow[iX]; + iDiffSum += iDiff * iDiff; +#endif + } + if (iDiffSum > iMax) //for speeding up + { + return iDiffSum; + } + refPatchRow += uiStride; + } + return iDiffSum; +#endif +} +#endif + + + uint32_t TrQuant::getLFNSTIntraMode( int wideAngPredMode ) { uint32_t intraMode; @@ -427,6 +1063,12 @@ void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID ) { intraMode = PLANAR_IDX; } +#if IDCC_TPM_JEM + if (PU::isTmp(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID))) + { + intraMode = PLANAR_IDX; + } +#endif CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" ); #if EXTENDED_LFNST @@ -567,6 +1209,12 @@ void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, cons { intraMode = PLANAR_IDX; } +#if IDCC_TPM_JEM + if (PU::isTmp(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID))) + { + intraMode = PLANAR_IDX; + } +#endif CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" ); #if EXTENDED_LFNST @@ -787,7 +1435,11 @@ std::vector<int> TrQuant::selectICTCandidates( const TransformUnit &tu, CompStor void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer) { const bool isExplicitMTS = (CU::isIntra(*tu.cu) ? tu.cs->sps->getUseIntraMTS() : tu.cs->sps->getUseInterMTS() && CU::isInter(*tu.cu)) && isLuma(compID); +#if IDCC_TPM_JEM && !IDCC_TMP_ImplicitMTS + const bool isImplicitMTS = CU::isIntra(*tu.cu) && tu.cs->sps->getUseImplicitMTS() && isLuma(compID) && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0 && tu.cu->TmpFlag == 0; +#else const bool isImplicitMTS = CU::isIntra(*tu.cu) && tu.cs->sps->getUseImplicitMTS() && isLuma(compID) && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0; +#endif const bool isISP = CU::isIntra(*tu.cu) && tu.cu->ispMode && isLuma(compID); const bool isSBT = CU::isInter(*tu.cu) && tu.cu->sbtInfo && isLuma(compID); @@ -804,7 +1456,11 @@ void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int & return; } +#if IDCC_TMP_ImplicitMTS + if (isImplicitMTS || isISP || tu.cu->TmpFlag) +#else if (isImplicitMTS || isISP) +#endif { int width = tu.blocks[compID].width; int height = tu.blocks[compID].height; diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h index cc6e2c1d9f8498cac91408855e7eca33f290be79..f116d83e256f8ab45ccf75ef0296a4a6ca14bcf1 100644 --- a/source/Lib/CommonLib/TrQuant.h +++ b/source/Lib/CommonLib/TrQuant.h @@ -55,6 +55,51 @@ typedef void FwdTrans(const TCoeff*, TCoeff*, int, int, int, int); typedef void InvTrans(const TCoeff*, TCoeff*, int, int, int, int, const TCoeff, const TCoeff); + + +#if IDCC_TPM_JEM + + +#define MAX_1DTRANS_LEN (1 << (((USE_MORE_BLOCKSIZE_DEPTH_MAX) + 1) << 1)) ///< 4x4 = 16, 8x8 = 64, 16x16=256, 32x32 = 1024 +extern unsigned int g_uiDepth2Width[5]; +extern unsigned int g_uiDepth2MaxCandiNum[5]; + +class TempLibFast +{ +public: + int m_pX; //offset X + int m_pY; //offset Y + int m_pXInteger; //offset X for integer pixel search + int m_pYInteger; //offset Y for integer pixel search + DistType m_pDiffInteger; + int getXInteger() { return m_pXInteger; } + int getYInteger() { return m_pYInteger; } + DistType getDiffInteger() { return m_pDiffInteger; } + short m_pIdInteger; //frame id + short getIdInteger() { return m_pIdInteger; } + DistType m_pDiff; //mse + short m_pId; //frame id + + + TempLibFast(); + ~TempLibFast(); + //void init(); + int getX() { return m_pX; } + int getY() { return m_pY; } + DistType getDiff() { return m_pDiff; } + short getId() { return m_pId; } + /*void initDiff(unsigned int uiPatchSize, int bitDepth); + void initDiff(unsigned int uiPatchSize, int bitDepth, int iCandiNumber);*/ + void initTemplateDiff(unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int bitDepth); + int m_diffMax; + int getDiffMax() { return m_diffMax; } +}; + + +typedef short TrainDataType; +#endif + + // ==================================================================================================================== // Class definition // ==================================================================================================================== @@ -87,6 +132,19 @@ public: void fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ); void invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ); #endif +#if IDCC_TPM_JEM + DistType calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, DistType iMax); + Pel** getTargetPatch(unsigned int uiDepth) { return m_pppTarPatch[uiDepth]; } + Pel* getRefPicUsed() { return m_refPicUsed; } + void setRefPicUsed(Pel* ref) { m_refPicUsed = ref; } + unsigned int getStride() { return m_uiPicStride; } + void setStride(unsigned int uiPicStride) { m_uiPicStride = uiPicStride; } + + void searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId); + void candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight); + bool generateTMPrediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int& foundCandiNum); + void getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight); +#endif uint32_t getLFNSTIntraMode( int wideAngPredMode ); bool getTransposeFlag ( uint32_t intraMode ); @@ -141,6 +199,15 @@ public: protected: TCoeff m_tempCoeff[MAX_TB_SIZEY * MAX_TB_SIZEY]; +#if IDCC_TPM_JEM + int m_uiPartLibSize; + TempLibFast m_tempLibFast; + Pel* m_refPicUsed; + Picture* m_refPicBuf; + unsigned int m_uiPicStride; + unsigned int m_uiVaildCandiNum; + Pel*** m_pppTarPatch; +#endif #if SIGN_PREDICTION Pel m_tempSignPredResid[SIGN_PRED_MAX_BS * SIGN_PRED_MAX_BS * 2]{0}; Pel m_signPredTemplate[SIGN_PRED_FREQ_RANGE*SIGN_PRED_FREQ_RANGE*SIGN_PRED_MAX_BS*2]; diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 06a94c5765e059c9ddc7c349fd1aec6dd5b79db8..9527da1e4f7d6b50f8f23ef489913e4f650ca214 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -1,4 +1,4 @@ -/* The copyright in this software is being made available under the BSD +/* The copyright in this software is being made available under the BSD * License, included below. This software may be subject to other third party * and contributor rights, including patent rights, and no such rights are * granted under this license. @@ -50,6 +50,41 @@ #include <assert.h> #include <cassert> +#define IDCC_TPM_JEM 1 // template matching prediction as implemented in JEM-7.2 +#if IDCC_TPM_JEM + +#define IDCC_TMP_SIMD 1 +//#define IDCC_TMP_MaxSize 64 +#define IDCC_SignleSearchRegion 0 // single region starting from current position up-left on + +#define IDCC_FixedComparisonPerPixel 1 +#if IDCC_FixedComparisonPerPixel +#define IDCC_SearchRangeMultFactor 5 +#endif + +#if !IDCC_SignleSearchRegion +#define IDCC_TMP_Within_CTU 1 +#endif +#if !IDCC_FixedComparisonPerPixel +#define IDCC_SEARCHRANGEINTRA 70 // should be larger than IDCC_TMP_MaxSize + IDCC_TemplateSize +#endif +#if IDCC_TMP_SIMD +#define IDCC_TemplateSize 4 // must be multiple of 4 for SIMD +#else +#define IDCC_TemplateSize 4 +#endif +#define IDCC_TMP_ImplicitMTS 1 + +#define IDCC_TMP_MaxSize_Depth 6 // should be log2(IDCC_TMP_MaxSize): keep as 6 to avoid any error + +#define VCEG_AZ08_USE_SSD_DISTANCE 0 ///< (default 0) If defined, use SSD distance. +#define VCEG_AZ08_USE_SAD_DISTANCE 1 ///< (default 1) If defined, use SAD distance. + +#if VCEG_AZ08_USE_SSD_DISTANCE || VCEG_AZ08_USE_SAD_DISTANCE +typedef int DistType; +#endif +#endif + // Run test with the following config file parameters: // // BIF : 1 diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 17937b452261e8ba85e87b3e1ca4b4015f930026..c281acf6ffe27554228e07829e54c7becf1f8a15 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -303,6 +303,9 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) smvdMode = other.smvdMode; ispMode = other.ispMode; mipFlag = other.mipFlag; +#if IDCC_TPM_JEM + TmpFlag = other.TmpFlag; +#endif #if INTER_LIC LICFlag = other.LICFlag; #endif @@ -384,6 +387,9 @@ void CodingUnit::initData() smvdMode = 0; ispMode = 0; mipFlag = false; +#if IDCC_TPM_JEM + TmpFlag = false; +#endif #if INTER_LIC LICFlag = false; #endif diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index a59bb6f63bf053df8d7013bbd2bc8f7b862d4348..e24b8c58bc8be25c0646fcbe72bf6af3eb96bd8a 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -334,6 +334,9 @@ struct CodingUnit : public UnitArea uint8_t BcwIdx; int8_t refIdxBi[2]; bool mipFlag; +#if IDCC_TPM_JEM + bool TmpFlag; +#endif #if INTER_LIC bool LICFlag; #endif diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 651de98a9ed1d305633c5d5dffa0d7dadad838ea..9f6b464441c3aac5f52ae109540b67491cf1415b 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -333,7 +333,12 @@ uint32_t CU::getCtuAddr( const CodingUnit &cu ) { return getCtuAddr( cu.blocks[cu.chType].lumaPos(), *cu.cs->pcv ); } - +#if IDCC_TPM_JEM +Position CU::getCtuXYAddr(const CodingUnit& cu) +{ + return Position((cu.blocks[cu.chType].lumaPos().x >> cu.cs->pcv->maxCUWidthLog2) << cu.cs->pcv->maxCUWidthLog2, (cu.blocks[cu.chType].lumaPos().y >> cu.cs->pcv->maxCUHeightLog2) << cu.cs->pcv->maxCUHeightLog2); +} +#endif int CU::predictQP( const CodingUnit& cu, const int prevQP ) { const CodingStructure &cs = *cu.cs; @@ -939,7 +944,12 @@ bool PU::isMIP(const PredictionUnit &pu, const ChannelType &chType) return isDMChromaMIP(pu) && (pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX); } } - +#if IDCC_TPM_JEM +bool PU::isTmp(const PredictionUnit& pu, const ChannelType& chType) +{ + return (chType == CHANNEL_TYPE_LUMA && pu.cu->TmpFlag); +} +#endif bool PU::isDMChromaMIP(const PredictionUnit &pu) { #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS @@ -951,7 +961,11 @@ bool PU::isDMChromaMIP(const PredictionUnit &pu) uint32_t PU::getIntraDirLuma( const PredictionUnit &pu ) { +#if IDCC_TPM_JEM + if (isMIP(pu) || isTmp(pu)) +#else if (isMIP(pu)) +#endif { return PLANAR_IDX; } @@ -4970,6 +4984,9 @@ bool CU::isMTSAllowed(const CodingUnit &cu, const ComponentID compID) mtsAllowed &= cuWidth <= maxSize && cuHeight <= maxSize; mtsAllowed &= !cu.ispMode; mtsAllowed &= !cu.sbtInfo; +#if IDCC_TMP_ImplicitMTS + mtsAllowed &= !cu.TmpFlag; +#endif mtsAllowed &= !(cu.bdpcmMode && cuWidth <= tsMaxSize && cuHeight <= tsMaxSize); return mtsAllowed; } @@ -5304,7 +5321,12 @@ bool allowLfnstWithMip(const Size& block) } return false; } - +#if IDCC_TPM_JEM +bool allowLfnstWithTpm() +{ + return true; +} +#endif #if INTER_LIC bool CU::isLICFlagPresent(const CodingUnit& cu) { diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index e85f668de33d8a2f7b87c23a681c3bcdf159fbca..a9c47df9814bcf6a67ce72f5f47f0f8dae446d50 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -70,6 +70,9 @@ namespace CU bool isSameSubPic (const CodingUnit &cu, const CodingUnit &cu2); bool isLastSubCUOfCtu (const CodingUnit &cu); uint32_t getCtuAddr (const CodingUnit &cu); +#if IDCC_TPM_JEM + Position getCtuXYAddr(const CodingUnit& cu); +#endif int predictQP (const CodingUnit& cu, const int prevQP ); uint32_t getNumPUs (const CodingUnit& cu); @@ -138,6 +141,9 @@ namespace PU int getIntraMPMs(const PredictionUnit &pu, unsigned *mpm, const ChannelType &channelType = CHANNEL_TYPE_LUMA); #endif bool isMIP (const PredictionUnit &pu, const ChannelType &chType = CHANNEL_TYPE_LUMA); +#if IDCC_TPM_JEM + bool isTmp(const PredictionUnit& pu, const ChannelType& chType = CHANNEL_TYPE_LUMA); +#endif bool isDMChromaMIP (const PredictionUnit &pu); uint32_t getIntraDirLuma (const PredictionUnit &pu); void getIntraChromaCandModes(const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE]); @@ -268,6 +274,9 @@ uint32_t getCtuAddr (const Position& pos, const PreCalcValues &pcv); int getNumModesMip (const Size& block); int getMipSizeId (const Size& block); bool allowLfnstWithMip(const Size& block); +#if IDCC_TPM_JEM +bool allowLfnstWithTpm(); +#endif template<typename T, size_t N> uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeList, static_vector<double, N>& candCostList diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.cpp b/source/Lib/CommonLib/dtrace_blockstatistics.cpp index 260c7cc03c7e61173ad82535c7ec61a761c3fe57..a5e071fbcb35f0572326c2367eaedf59a7de3cd4 100644 --- a/source/Lib/CommonLib/dtrace_blockstatistics.cpp +++ b/source/Lib/CommonLib/dtrace_blockstatistics.cpp @@ -884,6 +884,9 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) if(chType == CHANNEL_TYPE_LUMA) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MIPFlag), cu.mipFlag); +#if IDCC_TPM_JEM + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TmpFlag), cu.TmpFlag); +#endif DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::ISPMode), cu.ispMode); } diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.h b/source/Lib/CommonLib/dtrace_blockstatistics.h index c3ef3fd6d3473ae3f8c69272baa900fc032013a1..a416227b6dbfe3abf96f6b3f34b7ae77b0fcd397 100644 --- a/source/Lib/CommonLib/dtrace_blockstatistics.h +++ b/source/Lib/CommonLib/dtrace_blockstatistics.h @@ -78,6 +78,9 @@ enum class BlockStatistic { Chroma_IntraMode, MultiRefIdx, MIPFlag, +#if IDCC_TPM_JEM + TmpFlag, +#endif ISPMode, // inter @@ -170,6 +173,9 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType { BlockStatistic::JointCbCr, std::tuple<std::string, BlockStatisticType, std::string>{"JointCbCr", BlockStatisticType::Flag, ""}}, { BlockStatistic::MIPFlag, std::tuple<std::string, BlockStatisticType, std::string>{"MIPFlag", BlockStatisticType::Flag, ""}}, +#if IDCC_TPM_JEM + { BlockStatistic::TmpFlag, std::tuple<std::string, BlockStatisticType, std::string>{"TmpFlag", BlockStatisticType::Flag, ""}}, +#endif { BlockStatistic::ISPMode, std::tuple<std::string, BlockStatisticType, std::string>{"ISPMode", BlockStatisticType::Integer, "[0, " + std::to_string(NUM_INTRA_SUBPARTITIONS_MODES) + "]"}}, { BlockStatistic::Depth, std::tuple<std::string, BlockStatisticType, std::string>{"Depth", BlockStatisticType::Integer, "[0, 7]"}}, { BlockStatistic::QT_Depth, std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth", BlockStatisticType::Integer, "[0, 7]"}}, diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index a5286cdebd4effbc775579307c7bbf88bde2edef..d6dc6a190cbda6fcec3f508bb1dae27364e653ab 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -1606,7 +1606,17 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) cu.firstPU->intraDir[0] = cu.bdpcmMode == 2? VER_IDX : HOR_IDX; return; } - +#if IDCC_TPM_JEM + int TMP_MaxSize=cu.cs->sps->getIntraTMPMaxSize(); + if (cu.lwidth() <= TMP_MaxSize && cu.lheight() <= TMP_MaxSize) + { + Tmp_Flag(cu); + if (cu.TmpFlag) + return; + } + else + cu.TmpFlag = 0; +#endif mip_flag(cu); if (cu.mipFlag) { @@ -3848,7 +3858,11 @@ void CABACReader::residual_lfnst_mode( CodingUnit& cu, CUCtx& cuCtx ) int chIdx = cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA ? 1 : 0; #endif if ((cu.ispMode && !CU::canUseLfnstWithISP(cu, cu.chType)) +#if IDCC_TPM_JEM + || (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && ((cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) || (cu.TmpFlag && !allowLfnstWithTpm()))) +#else || (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) +#endif #if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_CHROMA && std::min(cu.blocks[1].width, cu.blocks[1].height) < 4) #else @@ -4488,7 +4502,27 @@ unsigned CABACReader::code_unary_fixed( unsigned ctxId, unsigned unary_max, unsi } return idx; } +#if IDCC_TPM_JEM +void CABACReader::Tmp_Flag(CodingUnit& cu) +{ + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__OTHER); + if (!cu.Y().valid()) + { + return; + } + + if( !cu.cs->sps->getUseIntraTMP() ) + { + cu.TmpFlag = false; + return; + } + + unsigned ctxId = DeriveCtx::CtxTmpFlag(cu); + cu.TmpFlag = m_BinDecoder.decodeBin(Ctx::TmpFlag(ctxId)); + DTRACE(g_trace_ctx, D_SYNTAX, "Tmp_Flag() pos=(%d,%d) mode=%d\n", cu.lumaPos().x, cu.lumaPos().y, cu.TmpFlag ? 1 : 0); +} +#endif void CABACReader::mip_flag( CodingUnit& cu ) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__OTHER ); diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index 31b0b3449563cbb80472acb93b74927fc422aa98..9697cde83b12b7624877c0f9527f1aa6a1d76f2c 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -107,6 +107,9 @@ public: void adaptive_color_transform(CodingUnit& cu); void sbt_mode ( CodingUnit& cu ); void end_of_ctu ( CodingUnit& cu, CUCtx& cuCtx ); +#if IDCC_TPM_JEM + void Tmp_Flag(CodingUnit& cu); +#endif void mip_flag ( CodingUnit& cu ); void mip_pred_modes ( CodingUnit& cu ); void mip_pred_mode ( PredictionUnit& pu ); diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 4a7817d835ea8c329826c0804d9e341f569eb4d1..b20d81b6efc70f8061829531de46d87bc5ba5fec 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -307,7 +307,19 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) } else { +#if IDCC_TPM_JEM + if (PU::isTmp(pu, chType)) + { + int foundCandiNum; + m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight()); + m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight()); + m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); + assert(foundCandiNum >= 1); + } + else if (PU::isMIP(pu, chType)) +#else if( PU::isMIP( pu, chType ) ) +#endif { m_pcIntraPred->initIntraMip( pu, area ); m_pcIntraPred->predIntraMip( compID, piPred, pu ); @@ -500,7 +512,19 @@ void DecCu::xIntraRecACTBlk(TransformUnit& tu) PelBuf piPred = cs.getPredBuf(area); m_pcIntraPred->initIntraPatternChType(*tu.cu, area); +#if IDCC_TPM_JEM + if (PU::isTmp(pu, chType)) + { + int foundCandiNum; + const unsigned int uiStride = cs.picture->getRecoBuf(COMPONENT_Y).stride; + m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight()); + m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight()); + m_pcTrQuant->generateTMPrediction(piPred.buf, uiStride, pu.lwidth(), pu.lheight(), foundCandiNum); + } + else if (PU::isMIP(pu, chType)) +#else if (PU::isMIP(pu, chType)) +#endif { m_pcIntraPred->initIntraMip(pu, area); m_pcIntraPred->predIntraMip(compID, piPred, pu); diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index d8d43d08c6f6a8801880616fddec6878bccdfdc1..ce6f1c1c7571f761ae5404c19374b3fa1dbdadbc 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -2259,6 +2259,13 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) READ_FLAG(uiCode, "sps_mip_enabled_flag"); pcSPS->setUseMIP( uiCode != 0 ); #if ENABLE_DIMD READ_FLAG(uiCode, "sps_dimd_enabled_flag"); pcSPS->setUseDimd(uiCode != 0); +#endif +#if IDCC_TPM_JEM + READ_FLAG(uiCode, "sps_intraTMP_enabled_flag"); pcSPS->setUseIntraTMP( uiCode != 0 ); + if(pcSPS->getUseIntraTMP()) + { + READ_UVLC(uiCode, "sps_log2_intra_tmp_max_size"); pcSPS->setIntraTMPMaxSize(1 << uiCode); + } #endif if( pcSPS->getChromaFormatIdc() != CHROMA_400) { diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index d0c2961f688d0838f1694f51a69dba42155114aa..0a40c34d59411eb269116eab45898cd9dc388234 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -1200,7 +1200,15 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) cu.firstPU->intraDir[0] = cu.bdpcmMode == 2? VER_IDX : HOR_IDX; return; } - +#if IDCC_TPM_JEM + int TMP_MaxSize=cu.cs->sps->getIntraTMPMaxSize(); + if (cu.lwidth() <= TMP_MaxSize && cu.lheight() <= TMP_MaxSize) + { + Tmp_Flag(cu); + if (cu.TmpFlag) + return; + } +#endif mip_flag(cu); if (cu.mipFlag) { @@ -1381,6 +1389,17 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) { if( pu.cu->bdpcmMode ) return; +#if IDCC_TPM_JEM + // check if sufficient search range is available + //bool bCheck = pu.cu-> + int TMP_MaxSize=pu.cu->cs->sps->getIntraTMPMaxSize(); + if (pu.cu->lwidth() <= TMP_MaxSize && pu.cu->lheight() <= TMP_MaxSize) + { + Tmp_Flag(*pu.cu); + if (pu.cu->TmpFlag) + return; + } +#endif mip_flag(*pu.cu); if (pu.cu->mipFlag) { @@ -3569,7 +3588,11 @@ void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx ) int chIdx = cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA ? 1 : 0; #endif if( ( cu.ispMode && !CU::canUseLfnstWithISP( cu, cu.chType ) ) || +#if IDCC_TPM_JEM + (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && ((cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) || (cu.TmpFlag && !allowLfnstWithTpm()))) || +#else (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) || +#endif #if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_CHROMA && std::min(cu.blocks[1].width, cu.blocks[1].height) < 4) #else @@ -4195,6 +4218,25 @@ void CABACWriter::code_unary_fixed( unsigned symbol, unsigned ctxId, unsigned un } } +#if IDCC_TPM_JEM +void CABACWriter::Tmp_Flag(const CodingUnit& cu) +{ + if (!cu.Y().valid()) + { + return; + } + + if( !cu.cs->sps->getUseIntraTMP() ) + { + return; + } + + unsigned ctxId = DeriveCtx::CtxTmpFlag(cu); + m_BinEncoder.encodeBin(cu.TmpFlag, Ctx::TmpFlag(ctxId)); + DTRACE(g_trace_ctx, D_SYNTAX, "Tmp_Flag() pos=(%d,%d) mode=%d\n", cu.lumaPos().x, cu.lumaPos().y, cu.TmpFlag ? 1 : 0); +} +#endif + void CABACWriter::mip_flag( const CodingUnit& cu ) { #if ENABLE_DIMD diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index 9999cad3d326c406fbfacaa2379c8268f541019d..f5ab58565bba7ff45566ed42f8cd666de34826f7 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -116,6 +116,9 @@ public: void adaptive_color_transform(const CodingUnit& cu); void sbt_mode ( const CodingUnit& cu ); void end_of_ctu ( const CodingUnit& cu, CUCtx& cuCtx ); +#if IDCC_TPM_JEM + void Tmp_Flag(const CodingUnit& cu); +#endif void mip_flag ( const CodingUnit& cu ); void mip_pred_modes ( const CodingUnit& cu ); void mip_pred_mode ( const PredictionUnit& pu ); diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 2f642584d5875d0a5517728a6befe99b795affee..756a4aacdea73d238c2594453e96620aedff7a5f 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -806,6 +806,10 @@ protected: bool m_alf; ///< Adaptive Loop Filter +#if IDCC_TPM_JEM + bool m_IntraTMP; ///< intra Template Matching + unsigned m_IntraTMP_MaxSize; ///< max CU size for which intra TMP is allowed +#endif #if ERICSSON_BIF bool m_BIF; int m_BIFStrength; @@ -1283,6 +1287,12 @@ public: bool getUseWrapAround () const { return m_wrapAround; } void setWrapAroundOffset ( unsigned u ) { m_wrapAroundOffset = u; } unsigned getWrapAroundOffset () const { return m_wrapAroundOffset; } +#if IDCC_TPM_JEM + void setUseIntraTMP(bool b) { m_IntraTMP = b; } + bool getUseIntraTMP() { return m_IntraTMP; } + void setIntraTMPMaxSize(unsigned n) { m_IntraTMP_MaxSize = n; } + unsigned getIntraTMPMaxSize() { return m_IntraTMP_MaxSize; } +#endif #if ERICSSON_BIF void setUseBIF ( bool b ) { m_BIF = b; } bool getUseBIF () const { return m_BIF; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index ce35b27035c47511363293ccaf6b7450029e3fa4..107624a0110b106cab1cf1d1d4d2241eec4f030c 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -2008,6 +2008,9 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS m_modeCtrl->setISPMode(cu.ispMode); m_modeCtrl->setISPLfnstIdx(cu.lfnstIdx); m_modeCtrl->setMIPFlagISPPass(cu.mipFlag); +#if IDCC_TPM_JEM + m_modeCtrl->setTPMFlagISPPass(cu.TmpFlag); +#endif m_modeCtrl->setBestISPIntraModeRelCU(cu.ispMode ? PU::getFinalIntraMode(*cu.firstPU, CHANNEL_TYPE_LUMA) : UINT8_MAX); m_modeCtrl->setBestDCT2NonISPCostRelCU(m_modeCtrl->getMtsFirstPassNoIspCost()); } @@ -3878,6 +3881,9 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure cu.mmvdSkip = false; cu.skip = false; cu.mipFlag = false; +#if IDCC_TPM_JEM + cu.TmpFlag = false; +#endif cu.bdpcmMode = 0; PredictionUnit &pu = tempCS->addPU(cu, pm.chType); @@ -4103,6 +4109,9 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure cu.mmvdSkip = false; cu.skip = false; cu.mipFlag = false; +#if IDCC_TPM_JEM + cu.TmpFlag = false; +#endif cu.bdpcmMode = 0; PredictionUnit &pu = tempCS->addPU(cu, pm.chType); pu.mergeFlag = true; @@ -4799,6 +4808,9 @@ void EncCu::xCheckSATDCostGeoMerge(CodingStructure *&tempCS, CodingUnit &cu, Pre cu.mmvdSkip = false; cu.skip = false; cu.mipFlag = false; +#if IDCC_TPM_JEM + cu.TmpFlag = false; +#endif cu.bdpcmMode = 0; pu.mergeFlag = true; pu.regularMergeFlag = false; @@ -5783,7 +5795,10 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure * pu.bdmvrRefine = true; m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[(uiMergeCand << 1) + 1]); } - PU::spanMotionInfo(pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1]); + else + { + PU::spanMotionInfo(pu, mergeCtx); + } #else PU::spanMotionInfo(pu, mergeCtx); #endif @@ -5795,6 +5810,13 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure * m_pcInterSearch->motionCompensation(pu, acMergeRealBuffer[uiMergeCand], REF_PIC_LIST_X, true, true); +#if MULTI_PASS_DMVR + if( pu.bdmvrRefine ) + { + ::memcpy( m_mvBufEncBDOF4TM[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof( Mv ) * BDOF_SUBPU_MAX_NUM ); + PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] ); + } +#endif distParam.cur = acMergeRealBuffer[uiMergeCand].Y(); Distortion uiSad = distParam.distFunc(distParam); m_CABACEstimator->getCtx() = ctxStart; @@ -5874,7 +5896,10 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure * #endif } #if MULTI_PASS_DMVR - PU::spanMotionInfo(pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1]); + if (!pu.bdmvrRefine) + { + PU::spanMotionInfo(pu, mergeCtx); + } #else PU::spanMotionInfo(pu, mergeCtx); #endif @@ -5882,6 +5907,12 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure * if( mrgTempBufSet ) { tempCS->getPredBuf().copyFrom(acMergeRealBuffer[uiMergeCand]); +#if MULTI_PASS_DMVR + if( pu.bdmvrRefine ) + { + PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] ); + } +#endif } else { @@ -5890,6 +5921,13 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure * m_pcInterSearch->m_storeBeforeLIC = false; #endif m_pcInterSearch->motionCompensation( pu ); +#if MULTI_PASS_DMVR + if( pu.bdmvrRefine ) + { + ::memcpy( m_mvBufEncBDOF4TM[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof( Mv ) * BDOF_SUBPU_MAX_NUM ); + PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] ); + } +#endif } xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL ); diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 46cb2b097d8c5f53e0cc301ae11d5aa27fb82f04..e2055c70e760d21fa206c1121c50816dcfffb6ad 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -1428,6 +1428,10 @@ void EncLib::xInitSPS( SPS& sps ) sps.setMaxNumAddHyps(m_maxNumAddHyps); sps.setNumAddHypWeights(m_numAddHypWeights); sps.setMaxNumAddHypRefFrames(m_maxNumAddHypRefFrames); +#endif +#if IDCC_TPM_JEM + sps.setUseIntraTMP(m_IntraTMP); + sps.setIntraTMPMaxSize(m_IntraTMP_MaxSize); #endif // ADD_NEW_TOOL : (encoder lib) set tool enabling flags and associated parameters here sps.setUseISP ( m_ISP ); diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index ee16ae06196168b494f386ad92a530550a2f41ca..67b0825473d1807c127628dfe14551828a02688c 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -2209,6 +2209,9 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt int bit4 = cuECtx.ispLfnstIdx == 2; int bit5 = cuECtx.mipFlag; int bit6 = cuECtx.bestCostIsp < cuECtx.bestNonDCT2Cost * 0.95; +#if IDCC_TPM_JEM + int bit7 = cuECtx.TmpFlag; +#endif int val = (bit0) | (bit1 << 1) | @@ -2217,6 +2220,9 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt (bit4 << 4) | (bit5 << 5) | (bit6 << 6) | +#if IDCC_TPM_JEM + (bit7 << 7) | +#endif ( cuECtx.bestPredModeDCT2 << 9 ); relatedCU.ispPredModeVal = val; relatedCU.bestDCT2NonISPCost = cuECtx.bestDCT2NonISPCost; diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index b55472cbd08c90acd5b93b78b5d519a0083239ad..5b62615ea0b94bb1cf4ee74eebe77560cf92ad82 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -238,6 +238,9 @@ struct ComprCUCtx ( MAX_DOUBLE ) , bestISPIntraMode ( UINT8_MAX ) +#if IDCC_TPM_JEM + , TmpFlag(false) +#endif , mipFlag ( false ) , ispMode ( NOT_INTRA_SUBPARTITIONS ) , ispLfnstIdx ( 0 ) @@ -283,6 +286,9 @@ struct ComprCUCtx double bestDCT2NonISPCost; double bestNonDCT2Cost; uint8_t bestISPIntraMode; +#if IDCC_TPM_JEM + bool TmpFlag; +#endif bool mipFlag; uint8_t ispMode; uint8_t ispLfnstIdx; @@ -390,6 +396,9 @@ public: void setBestNonDCT2Cost ( double val ) { m_ComprCUCtxList.back().bestNonDCT2Cost = val; } uint8_t getBestISPIntraModeRelCU () const { return m_ComprCUCtxList.back().bestISPIntraMode; } void setBestISPIntraModeRelCU ( uint8_t val ) { m_ComprCUCtxList.back().bestISPIntraMode = val; } +#if IDCC_TPM_JEM + void setTPMFlagISPPass(bool val) { m_ComprCUCtxList.back().TmpFlag = val; } +#endif void setMIPFlagISPPass ( bool val ) { m_ComprCUCtxList.back().mipFlag = val; } void setISPMode ( uint8_t val ) { m_ComprCUCtxList.back().ispMode = val; } void setISPLfnstIdx ( uint8_t val ) { m_ComprCUCtxList.back().ispLfnstIdx = val; } diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index 7429a84079b34d2743fe9dd75e8c0b824e1b15d8..a3ca8a54f75929355dd71784137ee03b11b79589 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -8710,7 +8710,8 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa } } #if WCG_EXT - if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))) + if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || ( + m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))) { const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] ); if (compID == COMPONENT_Y ) diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index ea77b2de720bac5fdf4ed442ac551681269bd88b..205d5618e35047c08e3c84838b334ee4c14db092 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -394,6 +394,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); const TempCtx ctxStartMipFlag ( m_CtxCache, SubCtx( Ctx::MipFlag, m_CABACEstimator->getCtx() ) ); +#if IDCC_TPM_JEM + const TempCtx ctxStartTpmFlag(m_CtxCache, SubCtx(Ctx::TmpFlag, m_CABACEstimator->getCtx())); +#endif const TempCtx ctxStartIspMode ( m_CtxCache, SubCtx( Ctx::ISPMode, m_CABACEstimator->getCtx() ) ); #if SECONDARY_MPM const TempCtx ctxStartMPMIdxFlag(m_CtxCache, SubCtx(Ctx::IntraLumaMPMIdx, m_CABACEstimator->getCtx())); @@ -491,6 +494,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c const bool mipAllowed = sps.getUseMIP() && isLuma(partitioner.chType) && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.firstPU->lumaSize())); const bool testMip = mipAllowed && !(cu.lwidth() > (8 * cu.lheight()) || cu.lheight() > (8 * cu.lwidth())); const bool supportedMipBlkSize = pu.lwidth() <= MIP_MAX_WIDTH && pu.lheight() <= MIP_MAX_HEIGHT; +#if IDCC_TPM_JEM + const bool tpmAllowed = sps.getUseIntraTMP() && isLuma(partitioner.chType) && ((cu.lfnstIdx == 0) || allowLfnstWithTpm()); + const bool testTpm = tpmAllowed && (cu.lwidth() <= sps.getIntraTMPMaxSize() && cu.lheight() <= sps.getIntraTMPMaxSize()); +#endif static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiRdModeList; @@ -566,10 +573,19 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c ? std::max(numModesForFullRD, floorLog2(std::min(pu.lwidth(), pu.lheight())) - 1) : numModesForFullRD; } +#if IDCC_TPM_JEM + if (testTpm) + numModesForFullRD += 1; // testing tpm + const int numHadCand = (testMip ? 2 : 1) * 3 + testTpm; +#else const int numHadCand = (testMip ? 2 : 1) * 3; +#endif //*** Derive (regular) candidates using Hadamard cu.mipFlag = false; +#if IDCC_TPM_JEM + cu.TmpFlag = false; +#endif //===== init pattern for luma prediction ===== initIntraPatternChType(cu, pu.Y(), true); @@ -600,6 +616,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c minSadHad += std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad)); // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. +#if IDCC_TPM_JEM + m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag); +#endif m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag ); m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode ); #if SECONDARY_MPM @@ -674,6 +693,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been // pre-estimated. +#if IDCC_TPM_JEM + m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag); +#endif m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); #if SECONDARY_MPM @@ -739,6 +761,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad)); // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. +#if IDCC_TPM_JEM + m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag); +#endif m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); #if SECONDARY_MPM @@ -781,6 +806,48 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_dSavedHadListLFNST.resize(3); LFNSTSaveFlag = false; } +#if IDCC_TPM_JEM + // derive TPM candidate using hadamard + if (testTpm) + { + cu.TmpFlag = true; + cu.mipFlag = false; + pu.multiRefIdx = 0; + + + + int foundCandiNum = 0; + bool bsuccessfull = 0; + CodingUnit cu_cpy = cu; + + if (isRefTemplateAvailable(cu_cpy, cu_cpy.blocks[COMPONENT_Y])) + { + m_pcTrQuant->getTargetTemplate(&cu_cpy, pu.lwidth(), pu.lheight()); + m_pcTrQuant->candidateSearchIntra(&cu_cpy, pu.lwidth(), pu.lheight()); + bsuccessfull = m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); + } + if (bsuccessfull && foundCandiNum >= 1) + { + + Distortion minSadHad = + std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad)); + + m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag); + + uint64_t fracModeBits = xFracModeBitsIntra(pu, 0, CHANNEL_TYPE_LUMA); + + double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass; + DTRACE(g_trace_ctx, D_INTRA_COST, "IntraTPM: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, + 0); + + updateCandList(ModeInfo(0, 0, 0, NOT_INTRA_SUBPARTITIONS, 0, 1), cost, uiRdModeList, + CandCostList, numModesForFullRD); + updateCandList(ModeInfo(0, 0, 0, NOT_INTRA_SUBPARTITIONS, 0, 1), + 0.8 * double(minSadHad), uiHadModeList, CandHadList, numHadCand); + } + + } +#endif //*** Derive MIP candidates using Hadamard if (testMip && !supportedMipBlkSize) { @@ -799,6 +866,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c } else if (testMip) { +#if IDCC_TPM_JEM + cu.TmpFlag = 0; +#endif cu.mipFlag = true; pu.multiRefIdx = 0; @@ -1025,6 +1095,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c cs.interHad = 0; //===== reset context models ===== +#if IDCC_TPM_JEM + m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag); +#endif m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); #if SECONDARY_MPM @@ -1128,6 +1201,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c uiOrgMode.modeId = cu.dimdMode; cu.dimd = true; } +#endif +#if IDCC_TPM_JEM + cu.TmpFlag = uiOrgMode.tpmFlg; #endif cu.mipFlag = uiOrgMode.mipFlg; pu.mipTransposedFlag = uiOrgMode.mipTrFlg; @@ -1140,6 +1216,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported"); CHECK(cu.ispMode && pu.multiRefIdx, "Error: combination of ISP and MRL not supported"); CHECK(cu.ispMode&& cu.colorTransform, "Error: combination of ISP and ACT not supported"); +#if IDCC_TPM_JEM + CHECK(cu.mipFlag&& cu.TmpFlag, "Error: combination of MIP and TPM not supported"); + CHECK(cu.TmpFlag&& cu.ispMode, "Error: combination of TPM and ISP not supported"); + CHECK(cu.TmpFlag&& pu.multiRefIdx, "Error: combination of TPM and MRL not supported"); +#endif pu.intraDir[CHANNEL_TYPE_CHROMA] = cu.colorTransform ? DM_CHROMA_IDX : pu.intraDir[CHANNEL_TYPE_CHROMA]; @@ -1180,10 +1261,17 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c uiBestPUMode.ispMod, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst); } } - +#if IDCC_TPM_JEM + if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.TmpFlag && testISP) +#else if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP) +#endif { +#if IDCC_TPM_JEM + m_regIntraRDListWithCosts.push_back(ModeInfoWithCost(cu.mipFlag, pu.mipTransposedFlag, pu.multiRefIdx, cu.ispMode, uiOrgMode.modeId, cu.TmpFlag, csTemp->cost)); +#else m_regIntraRDListWithCosts.push_back( ModeInfoWithCost( cu.mipFlag, pu.mipTransposedFlag, pu.multiRefIdx, cu.ispMode, uiOrgMode.modeId, csTemp->cost ) ); +#endif } if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ) @@ -1198,10 +1286,15 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c { m_modeCostStore[lfnstIdx][mode] = tmpValidReturn ? csTemp->cost : (MAX_DOUBLE / 2.0); //(MAX_DOUBLE / 2.0) ?? } - +#if IDCC_TPM_JEM + DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x, + cu.blocks[0].y, (int)width, (int)height, csTemp->cost, uiOrgMode.modeId, uiOrgMode.ispMod, + pu.multiRefIdx, cu.TmpFlag, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag); +#else DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x, cu.blocks[0].y, (int) width, (int) height, csTemp->cost, uiOrgMode.modeId, uiOrgMode.ispMod, pu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag); +#endif if( tmpValidReturn ) { @@ -1298,6 +1391,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c if( validReturn ) { //=== update PU data ==== +#if IDCC_TPM_JEM + cu.TmpFlag = uiBestPUMode.tpmFlg; +#endif cu.mipFlag = uiBestPUMode.mipFlg; pu.mipTransposedFlag = uiBestPUMode.mipTrFlg; pu.multiRefIdx = uiBestPUMode.mRefId; @@ -3252,7 +3348,19 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp } else { +#if IDCC_TPM_JEM + if (PU::isTmp(pu, chType)) + { + int foundCandiNum; + m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight()); + m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight()); + m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); + assert(foundCandiNum >= 1); + } + else if (PU::isMIP(pu, chType)) +#else if( PU::isMIP( pu, chType ) ) +#endif { initIntraMip( pu, area ); predIntraMip( compID, piPred, pu ); @@ -4474,7 +4582,20 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti PelBuf piResi = resiBuf.bufs[compID]; initIntraPatternChType(*tu.cu, area); +#if IDCC_TPM_JEM + if (PU::isTmp(pu, chType)) + { + int foundCandiNum; + m_pcTrQuant->getTargetTemplate(pu.cu, pu.lwidth(), pu.lheight()); + m_pcTrQuant->candidateSearchIntra(pu.cu, pu.lwidth(), pu.lheight()); + m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum); + assert(foundCandiNum >= 1); + + } + else if (PU::isMIP(pu, chType)) +#else if (PU::isMIP(pu, chType)) +#endif { initIntraMip(pu, area); predIntraMip(compID, piPred, pu); diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h index a2246e326242e99a484215b66d7b43ba496dbb36..b11a37a36218fd800ba8ecc0f1aeec9266a62c9d 100644 --- a/source/Lib/EncoderLib/IntraSearch.h +++ b/source/Lib/EncoderLib/IntraSearch.h @@ -221,17 +221,30 @@ private: int mRefId; // PU::multiRefIdx uint8_t ispMod; // CU::ispMode uint32_t modeId; // PU::intraDir[CHANNEL_TYPE_LUMA] - +#if IDCC_TPM_JEM + bool tpmFlg; // CU::TmpFlag +#endif +#if IDCC_TPM_JEM + ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0), tpmFlg(0) {} + ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, const bool tpmf = 0) : mipFlg(mipf), mipTrFlg(miptf), mRefId(mrid), ispMod(ispm), modeId(mode), tpmFlg(tpmf) {} + bool operator==(const ModeInfo cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && tpmFlg == cmp.tpmFlg); } +#else ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0) {} ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode) : mipFlg(mipf), mipTrFlg(miptf), mRefId(mrid), ispMod(ispm), modeId(mode) {} bool operator==(const ModeInfo cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId); } +#endif }; struct ModeInfoWithCost : public ModeInfo { double rdCost; ModeInfoWithCost() : ModeInfo(), rdCost(MAX_DOUBLE) {} +#if IDCC_TPM_JEM + ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, const bool tpmf, double cost) : ModeInfo(mipf, miptf, mrid, ispm, mode, tpmf), rdCost(cost) {} + bool operator==(const ModeInfoWithCost cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && tpmFlg == cmp.tpmFlg && rdCost == cmp.rdCost); } +#else ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, double cost) : ModeInfo(mipf, miptf, mrid, ispm, mode), rdCost(cost) {} bool operator==(const ModeInfoWithCost cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && rdCost == cmp.rdCost); } +#endif static bool compareModeInfoWithCost(ModeInfoWithCost a, ModeInfoWithCost b) { return a.rdCost < b.rdCost; } }; diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index 08f2878c09d05e6b4a9816a1e4bb297af166ea1f..56c523b37f8b032a82eca66b49799432ac72fb89 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -132,6 +132,7 @@ void VLCWriter::xWriteUvlc ( uint32_t uiCode ) uint32_t uiLength = 1; uint32_t uiTemp = ++uiCode; + if(!uiTemp){std::cout << "integer overflow: uiCode=" << uiCode << std::endl;} CHECK( !uiTemp, "Integer overflow" ); while( 1 != uiTemp ) @@ -1383,6 +1384,14 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) #if ENABLE_DIMD WRITE_FLAG( pcSPS->getUseDimd() ? 1 : 0, "sps_dimd_enabled_flag"); #endif +#if IDCC_TPM_JEM + WRITE_FLAG( pcSPS->getUseIntraTMP() ? 1 : 0, "sps_intraTMP_enabled_flag"); + if(pcSPS->getUseIntraTMP()) + { + WRITE_UVLC(floorLog2(pcSPS->getIntraTMPMaxSize()), "sps_log2_intra_tmp_max_size"); + } +#endif + if( pcSPS->getChromaFormatIdc() != CHROMA_400) { WRITE_FLAG( pcSPS->getUseLMChroma() ? 1 : 0, "sps_cclm_enabled_flag");