diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index 1bd41905edc89da556daaa387d9d01f3b84b1857..edf20f570ce22ede7d9d7afe8e6678a3f709711e 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -1571,7 +1571,17 @@ const CtxSet ContextSetCfg::TransformSkipFlag = ContextSetCfg::addCtxSet { 1, 5 }, { 2, 5 } }); - +#if JVET_W0103_INTRA_MTS +const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet +({ + { 45, 35, 20, 45, }, + { 38, 35, 35, 38, }, + { 37, 28, 28, 37, }, + { 8, 10, 10, 8, }, + { 8, 10, 10, 8, }, + { 9, 10, 10, 8, } + }); +#else const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet ({ { 37, 25, 34, 40 }, @@ -1581,7 +1591,7 @@ const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet { 9, 0, 10, 8 }, { 9, 1, 9, 0 } }); - +#endif const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet ({ #if JVET_W0123_TIMD_FUSION @@ -2579,7 +2589,15 @@ const CtxSet ContextSetCfg::TransformSkipFlag = ContextSetCfg::addCtxSet { 25, 9, }, { 1, 1, }, }); - +#if JVET_W0103_INTRA_MTS +const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet +({ + { 45, 35, 20, 45, }, + { 38, 35, 35, 38, }, + { 37, 28, 28, 37, }, + { 8, 9, 9, 8, }, + }); +#else const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet ({ { 45, 25, 27, 0, }, @@ -2587,7 +2605,7 @@ const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet { 29, 0, 28, 0, }, { 8, 0, 9, 0, }, }); - +#endif const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet ({ #if JVET_W0123_TIMD_FUSION diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index fb4317e06b126a8385b25016ce3e79ed56104f23..1bf0b8ef990831d35f8f10115c7b540ef8313c31 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -3843,7 +3843,213 @@ TMatrixCoeff g_trCoreDCT2P256[256][256]; TMatrixCoeff g_trCoreDCT8P256[256][256]; TMatrixCoeff g_trCoreDST7P256[256][256]; #endif +#if JVET_W0103_INTRA_MTS +TMatrixCoeff g_aiTr2[NUM_TRANS_TYPE][2][2]; +TMatrixCoeff g_aiTr4[NUM_TRANS_TYPE][4][4]; +TMatrixCoeff g_aiTr8[NUM_TRANS_TYPE][8][8]; +TMatrixCoeff g_aiTr16[NUM_TRANS_TYPE][16][16]; +TMatrixCoeff g_aiTr32[NUM_TRANS_TYPE][32][32]; +TMatrixCoeff g_aiTr64[NUM_TRANS_TYPE][64][64]; +TMatrixCoeff g_aiTr128[NUM_TRANS_TYPE][128][128]; +TMatrixCoeff g_aiTr256[NUM_TRANS_TYPE][256][256]; +const uint8_t g_aucIpmToTrSet[16][36] = +{ + //0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 MIP + { 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 }, //4x4 + { 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9 }, //4x8 + {10,10,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,14 }, //4x16 + {15,15,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,18,18,18,19 }, //4x32 + {20,20,21,21,21,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,23,23,23,24 }, //8x4 + {25,25,26,26,26,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,29 }, //8x8 + {30,30,31,31,31,31,31,31,31,31,31,31,31,32,32,32,32,32,32,32,32,32,32,32,33,33,33,33,33,33,33,33,33,33,33,34 }, //8x16 + {35,35,36,36,36,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,37,37,37,38,38,38,38,38,38,38,38,38,38,38,39 }, //8x32 + {40,40,41,41,41,41,41,41,41,41,41,41,41,42,42,42,42,42,42,42,42,42,42,42,43,43,43,43,43,43,43,43,43,43,43,44 }, //16x4 + {45,45,46,46,46,46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,47,47,47,48,48,48,48,48,48,48,48,48,48,48,49 }, //16x8 + {50,50,51,51,51,51,51,51,51,51,51,51,51,52,52,52,52,52,52,52,52,52,52,52,53,53,53,53,53,53,53,53,53,53,53,54 }, //16x16 + {55,55,56,56,56,56,56,56,56,56,56,56,56,57,57,57,57,57,57,57,57,57,57,57,58,58,58,58,58,58,58,58,58,58,58,59 }, //16x32 + {60,60,61,61,61,61,61,61,61,61,61,61,61,62,62,62,62,62,62,62,62,62,62,62,63,63,63,63,63,63,63,63,63,63,63,64 }, //32x4 + {65,65,66,66,66,66,66,66,66,66,66,66,66,67,67,67,67,67,67,67,67,67,67,67,68,68,68,68,68,68,68,68,68,68,68,69 }, //32x8 + {70,70,71,71,71,71,71,71,71,71,71,71,71,72,72,72,72,72,72,72,72,72,72,72,73,73,73,73,73,73,73,73,73,73,73,74 }, //32x16 + {75,75,76,76,76,76,76,76,76,76,76,76,76,77,77,77,77,77,77,77,77,77,77,77,78,78,78,78,78,78,78,78,78,78,78,79 }, //32x32 +}; +const int8_t g_aiIdLut[3][3] = +{ + { 8, 6, 4 },{ 8, 8, 6 },{ 4, 2, -1 } +}; +const uint8_t g_aucTrIdxToTr[25][2] = +{ + { DCT8, DCT8 },{ DCT8, DST7 },{ DCT8, DCT5 },{ DCT8, DST4 }, {DCT8, DST1}, + { DST7, DCT8 },{ DST7, DST7 },{ DST7, DCT5 },{ DST7, DST4 }, {DST7, DST1}, + { DCT5, DCT8 },{ DCT5, DST7 },{ DCT5, DCT5 },{ DCT5, DST4 }, {DCT5, DST1}, + { DST4, DCT8 },{ DST4, DST7 },{ DST4, DCT5 },{ DST4, DST4 }, {DST4, DST1}, + { DST1, DCT8 },{ DST1, DST7 },{ DST1, DCT5 },{ DST1, DST4 }, {DST1, DST1}, +}; + +const uint8_t g_aucTrSet[80][4] = +{ +//T0:0, 1, 2, 3, +{ 17, 18, 23, 24}, +//T1:0, 1, 2, 3, +{ 3, 7, 18, 22}, +//T2:0, 1, 2, 3, +{ 2, 17, 18, 22}, +//T3:0, 1, 2, 3, +{ 3, 15, 17, 18}, +//T4:0, 1, 2, 3, +{ 3, 12, 18, 19}, +//T5:0, 1, 2, 3, +{ 12, 18, 19, 23}, +//T6:0, 1, 2, 3, +{ 2, 12, 17, 18}, +//T7:0, 1, 2, 3, +{ 2, 17, 18, 22}, +//T8:0, 1, 2, 3, +{ 2, 11, 17, 18}, +//T9:0, 1, 2, 3, +{ 12, 18, 19, 23}, +//T10:0, 1, 2, 3, +{ 12, 13, 16, 24}, +//T11:0, 1, 2, 3, +{ 2, 11, 16, 23}, +//T12:0, 1, 2, 3, +{ 2, 13, 17, 22}, +//T13:0, 1, 2, 3, +{ 2, 11, 17, 21}, +//T14:0, 1, 2, 3, +{ 13, 16, 19, 22}, +//T15:0, 1, 2, 3, +{ 7, 12, 13, 18}, +//T16:0, 1, 2, 3, +{ 1, 11, 12, 16}, +//T17:0, 1, 2, 3, +{ 3, 13, 17, 22}, +//T18:0, 1, 2, 3, +{ 1, 6, 12, 22}, +//T19:0, 1, 2, 3, +{ 12, 13, 15, 16}, +//T20:0, 1, 2, 3, +{ 18, 19, 23, 24}, +//T21:0, 1, 2, 3, +{ 2, 17, 18, 24}, +//T22:0, 1, 2, 3, +{ 3, 4, 17, 22}, +//T23:0, 1, 2, 3, +{ 12, 18, 19, 23}, +//T24:0, 1, 2, 3, +{ 12, 18, 19, 23}, +//T25:0, 1, 2, 3, +{ 6, 12, 18, 24}, +//T26:0, 1, 2, 3, +{ 2, 6, 12, 21}, +//T27:0, 1, 2, 3, +{ 1, 11, 17, 22}, +//T28:0, 1, 2, 3, +{ 3, 11, 16, 17}, +//T29:0, 1, 2, 3, +{ 8, 12, 19, 23}, +//T30:0, 1, 2, 3, +{ 7, 13, 16, 23}, +//T31:0, 1, 2, 3, +{ 1, 6, 11, 12}, +//T32:0, 1, 2, 3, +{ 1, 11, 17, 21}, +//T33:0, 1, 2, 3, +{ 6, 11, 17, 21}, +//T34:0, 1, 2, 3, +{ 8, 11, 14, 17}, +//T35:0, 1, 2, 3, +{ 6, 11, 12, 21}, +//T36:0, 1, 2, 3, +{ 1, 6, 11, 12}, +//T37:0, 1, 2, 3, +{ 2, 6, 11, 12}, +//T38:0, 1, 2, 3, +{ 1, 6, 11, 21}, +//T39:0, 1, 2, 3, +{ 7, 11, 12, 16}, +//T40:0, 1, 2, 3, +{ 8, 12, 19, 24}, +//T41:0, 1, 2, 3, +{ 1, 13, 18, 22}, +//T42:0, 1, 2, 3, +{ 2, 6, 17, 21}, +//T43:0, 1, 2, 3, +{ 11, 12, 16, 19}, +//T44:0, 1, 2, 3, +{ 8, 12, 17, 24}, +//T45:0, 1, 2, 3, +{ 6, 12, 19, 21}, +//T46:0, 1, 2, 3, +{ 6, 12, 13, 21}, +//T47:0, 1, 2, 3, +{ 2, 16, 17, 21}, +//T48:0, 1, 2, 3, +{ 6, 17, 19, 23}, +//T49:0, 1, 2, 3, +{ 6, 12, 14, 17}, +//T50:0, 1, 2, 3, +{ 6, 7, 11, 21}, +//T51:0, 1, 2, 3, +{ 1, 11, 12, 16}, +//T52:0, 1, 2, 3, +{ 1, 6, 11, 12}, +//T53:0, 1, 2, 3, +{ 6, 11, 12, 21}, +//T54:0, 1, 2, 3, +{ 7, 8, 9, 11}, +//T55:0, 1, 2, 3, +{ 6, 7, 11, 12}, +//T56:0, 1, 2, 3, +{ 6, 7, 11, 12}, +//T57:0, 1, 2, 3, +{ 1, 11, 12, 16}, +//T58:0, 1, 2, 3, +{ 6, 11, 17, 21}, +//T59:0, 1, 2, 3, +{ 6, 7, 11, 12}, +//T60:0, 1, 2, 3, +{ 12, 14, 18, 21}, +//T61:0, 1, 2, 3, +{ 1, 11, 16, 22}, +//T62:0, 1, 2, 3, +{ 1, 11, 16, 22}, +//T63:0, 1, 2, 3, +{ 7, 13, 15, 16}, +//T64:0, 1, 2, 3, +{ 1, 8, 12, 19}, +//T65:0, 1, 2, 3, +{ 6, 7, 9, 12}, +//T66:0, 1, 2, 3, +{ 2, 6, 12, 13}, +//T67:0, 1, 2, 3, +{ 1, 12, 16, 21}, +//T68:0, 1, 2, 3, +{ 7, 11, 16, 19}, +//T69:0, 1, 2, 3, +{ 7, 8, 11, 12}, +//T70:0, 1, 2, 3, +{ 6, 7, 11, 12}, +//T71:0, 1, 2, 3, +{ 6, 7, 11, 12}, +//T72:0, 1, 2, 3, +{ 1, 6, 11, 12}, +//T73:0, 1, 2, 3, +{ 6, 7, 11, 16}, +//T74:0, 1, 2, 3, +{ 6, 7, 11, 12}, +//T75:0, 1, 2, 3, +{ 6, 7, 11, 12}, +//T76:0, 1, 2, 3, +{ 6, 11, 12, 21}, +//T77:0, 1, 2, 3, +{ 1, 6, 11, 12}, +//T78:0, 1, 2, 3, +{ 6, 7, 11, 12}, +//T79:0, 1, 2, 3, +{ 6, 7, 11, 12}, +}; +#endif // initialize ROM variables void initROM() { @@ -4041,7 +4247,7 @@ void initROM() g_paletteQuant[qp] = (int)(threshQP*0.16 + 0.5); } -#if SIGN_PREDICTION +#if SIGN_PREDICTION && !JVET_W0103_INTRA_MTS memset(&g_resiBorderTemplate[0][0][0], 0, sizeof(g_resiBorderTemplate)); const int8_t *p_data = g_initRomSignPred; for( int log2Width = 0; log2Width < 6; ++log2Width) @@ -4094,6 +4300,72 @@ void initROM() } } #endif +#if JVET_W0103_INTRA_MTS +#if LMS_LINEAR_MODEL || TRANSFORM_SIMD_OPT || TU_256 + c = 2; +#else + int c = 2; + const double PI = 3.14159265358979323846; +#endif + + for (int i = 0; i < 8; i++) + { + const double s = sqrt((double)c) * (64 << COM16_C806_TRANS_PREC); + TMatrixCoeff *iT = NULL; + + switch (i) + { + case 0: iT = g_aiTr2[0][0]; break; + case 1: iT = g_aiTr4[0][0]; break; + case 2: iT = g_aiTr8[0][0]; break; + case 3: iT = g_aiTr16[0][0]; break; + case 4: iT = g_aiTr32[0][0]; break; + case 5: iT = g_aiTr64[0][0]; break; + case 6: iT = g_aiTr128[0][0]; break; + case 7: iT = g_aiTr256[0][0]; break; + case 8: exit(0); break; + } + + for (int k = 0; k < c; k++) + { + for (int n = 0; n < c; n++) + { + double w0, w1, v; + + // DCT-II + w0 = k == 0 ? sqrt(0.5) : 1; + v = cos(PI*(n + 0.5)*k / c) * w0 * sqrt(2.0 / c); + iT[DCT2*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5)); + + // DCT-V + w0 = (k == 0) ? sqrt(0.5) : 1.0; + w1 = (n == 0) ? sqrt(0.5) : 1.0; + v = cos(PI*n*k / (c - 0.5)) * w0 * w1 * sqrt(2.0 / (c - 0.5)); + iT[DCT5*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5)); + + // DCT-VIII + v = cos(PI*(k + 0.5)*(n + 0.5) / (c + 0.5)) * sqrt(2.0 / (c + 0.5)); + iT[DCT8*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5)); + + // DST-I + v = sin(PI*(n + 1)*(k + 1) / (c + 1)) * sqrt(2.0 / (c + 1)); + iT[DST1*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5)); + + // DST-VII + v = sin(PI*(k + 0.5)*(n + 1) / (c + 0.5)) * sqrt(2.0 / (c + 0.5)); + iT[DST7*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5)); + + // DST4 + v = sin(PI * (k + 0.5) * (n + 0.5) / c) * sqrt(2.0 / c); + iT[DST4 * c * c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5)); + + // ID + iT[IDTR * c * c + k * c + n] = (k == n) ? (short)(s + (s > 0 ? 0.5 : -0.5)) : 0; + } + } + c <<= 1; + } +#endif } void destroyROM() diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index b41f205ba94e38bffa8a28c550716edd2b20a4dd..b6f4d1faa7693a4397cb75fe0aa7948941d45e33 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -166,7 +166,21 @@ extern TMatrixCoeff g_trCoreDCT2P256[256][256]; extern TMatrixCoeff g_trCoreDCT8P256[256][256]; extern TMatrixCoeff g_trCoreDST7P256[256][256]; #endif - +#if JVET_W0103_INTRA_MTS +extern TMatrixCoeff g_aiTr2[NUM_TRANS_TYPE][2][2]; +extern TMatrixCoeff g_aiTr4[NUM_TRANS_TYPE][4][4]; +extern TMatrixCoeff g_aiTr8[NUM_TRANS_TYPE][8][8]; +extern TMatrixCoeff g_aiTr16[NUM_TRANS_TYPE][16][16]; +extern TMatrixCoeff g_aiTr32[NUM_TRANS_TYPE][32][32]; +extern TMatrixCoeff g_aiTr64[NUM_TRANS_TYPE][64][64]; +extern TMatrixCoeff g_aiTr128[NUM_TRANS_TYPE][128][128]; +extern TMatrixCoeff g_aiTr256[NUM_TRANS_TYPE][256][256]; + +extern const uint8_t g_aucIpmToTrSet[16][36]; +extern const uint8_t g_aucTrSet[80][4]; +extern const int8_t g_aiIdLut[3][3]; +extern const uint8_t g_aucTrIdxToTr[25][2]; +#endif #if EXTENDED_LFNST extern const int8_t g_lfnst8x8[ 35 ][ 3 ][ 64 ][ 64 ]; extern const int8_t g_lfnst4x4[ 35 ][ 3 ][ 16 ][ 16 ]; diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index 26fbda3252bb36edb6394600b284812be9f215d9..4b7586705aa37c25ec7c5ae35142ce09271e4140 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -297,6 +297,12 @@ void TrQuant::init( const Quant* otherQuant, { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, fastForwardDCT2_B64, fastForwardDCT2_B128, fastForwardDCT2_B256 }, { nullptr, fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, fastForwardDCT8_B64, fastForwardDCT8_B128, fastForwardDCT8_B256 }, { nullptr, fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, fastForwardDST7_B64, fastForwardDST7_B128, fastForwardDST7_B256 }, +#if JVET_W0103_INTRA_MTS + { nullptr, fastForwardDCT5_B4, fastForwardDCT5_B8, fastForwardDCT5_B16, fastForwardDCT5_B32, fastForwardDCT5_B64, fastForwardDCT5_B128, fastForwardDCT5_B256 }, + { nullptr, fastForwardDST4_B4, fastForwardDST4_B8, fastForwardDST4_B16, fastForwardDST4_B32, fastForwardDST4_B64, fastForwardDST4_B128, fastForwardDST4_B256 }, + { nullptr, fastForwardDST1_B4, fastForwardDST1_B8, fastForwardDST1_B16, fastForwardDST1_B32, fastForwardDST1_B64, fastForwardDST1_B128, fastForwardDST1_B256 }, + { nullptr, fastForwardIDTR_B4, fastForwardIDTR_B8, fastForwardIDTR_B16, fastForwardIDTR_B32, fastForwardIDTR_B64, fastForwardIDTR_B128, fastForwardIDTR_B256 }, +#endif } }; fastInvTrans = @@ -304,6 +310,12 @@ void TrQuant::init( const Quant* otherQuant, { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, fastInverseDCT2_B64, fastInverseDCT2_B128, fastInverseDCT2_B256 }, { nullptr, fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, fastInverseDCT8_B64, fastInverseDCT8_B128, fastInverseDCT8_B256 }, { nullptr, fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, fastInverseDST7_B64, fastInverseDST7_B128, fastInverseDST7_B256 }, +#if JVET_W0103_INTRA_MTS + { nullptr, fastInverseDCT5_B4, fastInverseDCT5_B8, fastInverseDCT5_B16, fastInverseDCT5_B32, fastInverseDCT5_B64, fastInverseDCT5_B128, fastInverseDCT5_B256 }, + { nullptr, fastInverseDST4_B4, fastInverseDST4_B8, fastInverseDST4_B16, fastInverseDST4_B32, fastInverseDST4_B64, fastInverseDST4_B128, fastInverseDST4_B256 }, + { nullptr, fastInverseDST1_B4, fastInverseDST1_B8, fastInverseDST1_B16, fastInverseDST1_B32, fastInverseDST1_B64, fastInverseDST1_B128, fastInverseDST1_B256 }, + { nullptr, fastInverseIDTR_B4, fastInverseIDTR_B8, fastInverseIDTR_B16, fastInverseIDTR_B32, fastInverseIDTR_B64, fastInverseIDTR_B128, fastInverseIDTR_B256 }, +#endif } }; #else fastFwdTrans = @@ -311,6 +323,12 @@ void TrQuant::init( const Quant* otherQuant, { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, fastForwardDCT2_B64 }, { nullptr, fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, nullptr }, { nullptr, fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, nullptr }, +#if JVET_W0103_INTRA_MTS + { nullptr, fastForwardDCT5_B4, fastForwardDCT5_B8, fastForwardDCT5_B16, fastForwardDCT5_B32, nullptr }, + { nullptr, fastForwardDST4_B4, fastForwardDST4_B8, fastForwardDST4_B16, fastForwardDST4_B32, nullptr }, + { nullptr, fastForwardDST1_B4, fastForwardDST1_B8, fastForwardDST1_B16, fastForwardDST1_B32, nullptr }, + { nullptr, fastForwardIDTR_B4, fastForwardIDTR_B8, fastForwardIDTR_B16, fastForwardIDTR_B32, nullptr }, +#endif } }; fastInvTrans = @@ -318,6 +336,12 @@ void TrQuant::init( const Quant* otherQuant, { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, fastInverseDCT2_B64 }, { nullptr, fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, nullptr }, { nullptr, fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, nullptr }, +#if JVET_W0103_INTRA_MTS + { nullptr, fastInverseDCT5_B4, fastInverseDCT5_B8, fastInverseDCT5_B16, fastInverseDCT5_B32, nullptr }, + { nullptr, fastInverseDST4_B4, fastInverseDST4_B8, fastInverseDST4_B16, fastInverseDST4_B32, nullptr }, + { nullptr, fastInverseDST1_B4, fastInverseDST1_B8, fastInverseDST1_B16, fastInverseDST1_B32, nullptr }, + { nullptr, fastInverseIDTR_B4, fastInverseIDTR_B8, fastInverseIDTR_B16, fastInverseIDTR_B32, nullptr }, +#endif } }; #endif @@ -1323,6 +1347,64 @@ void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int & if (isExplicitMTS) { +#if JVET_W0103_INTRA_MTS + if (tu.mtsIdx[compID] > MTS_SKIP && CU::isIntra(*tu.cu)) + { + CHECK(compID != COMPONENT_Y, " MTS activated for chroma"); + uint32_t width = tu.blocks[compID].width; + uint32_t height = tu.blocks[compID].height; + int TrIdx = (tu.mtsIdx[compID] - MTS_DST7_DST7); + CHECK(width < 4 || height < 4, "width < 4 || height < 4 for MTS"); + uint8_t nSzIdxW = std::min(3, (floorLog2(width) - 2)); + uint8_t nSzIdxH = std::min(3, (floorLog2(height) - 2)); + const CompArea& area = tu.blocks[compID]; + int predMode = PU::getFinalIntraMode(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)); +#if JVET_W0123_TIMD_FUSION + if (tu.cu->timd && compID == COMPONENT_Y) + { + predMode = MAP131TO67(predMode); + } +#endif + int ucMode; + int nMdIdx; + bool isTrTransposed = false; + if (tu.cu->mipFlag) //MIP is treated as planar. + { + ucMode = 0; + nMdIdx = 35; + isTrTransposed = (tu.cs->getPU(area.pos(), toChannelType(compID)))->mipTransposedFlag; + } + else + { + ucMode = predMode; //"ucMode" is the signaled Mode. + predMode = PU::getWideAngle(tu, (uint32_t)predMode, compID); + CHECK(predMode < -(NUM_EXT_LUMA_MODE >> 1) && predMode >= NUM_LUMA_MODE + (NUM_EXT_LUMA_MODE >> 1), "luma mode out of range"); + predMode = (predMode < 0) ? 2 : (predMode >= NUM_LUMA_MODE) ? 66 : predMode; + nMdIdx = predMode > DIA_IDX ? (NUM_LUMA_MODE + 1 - predMode) : predMode; + isTrTransposed = (predMode > DIA_IDX) ? true : false; + } + uint8_t nSzIdx = isTrTransposed ? (nSzIdxH * 4 + nSzIdxW) : (nSzIdxW * 4 + nSzIdxH); + CHECK(nSzIdx >= 16, "nSzIdx >= 16"); + CHECK(nMdIdx >= 36, "nMdIdx >= 36"); + uint8_t nTrSet = g_aucIpmToTrSet[nSzIdx][nMdIdx]; + CHECK(nTrSet >= 80, "nTrSet >= 80"); + trTypeVer = g_aucTrIdxToTr[g_aucTrSet[nTrSet][TrIdx]][predMode > DIA_IDX ? 1 : 0]; + trTypeHor = g_aucTrIdxToTr[g_aucTrSet[nTrSet][TrIdx]][predMode > DIA_IDX ? 0 : 1]; + predMode = ucMode; //to Check IDTR criteria, signaled mode should be used to check the difference + if (TrIdx == 3 && width <= 16 && height <= 16) + { + if (abs(predMode - HOR_IDX) <= g_aiIdLut[floorLog2(width) - 2][floorLog2(height) - 2]) + { + trTypeVer = IDTR; + } + if (abs(predMode - VER_IDX) <= g_aiIdLut[floorLog2(width) - 2][floorLog2(height) - 2]) + { + trTypeHor = IDTR; + } + } + } + else +#endif if (tu.mtsIdx[compID] > MTS_SKIP) { int indHor = (tu.mtsIdx[compID] - MTS_DST7_DST7) & 1; @@ -1621,7 +1703,35 @@ void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const itC++; } } +#if JVET_W0103_INTRA_MTS +// does transform for MTS candidates and return absSum of unquant Coeffs. +uint64_t TrQuant::transformNxN(TransformUnit& tu) +{ + CHECK(!tu.cu->mtsFlag, "mtsFlag should be on for selection"); + CodingStructure &cs = *tu.cs; + const CompArea &rect = tu.blocks[COMPONENT_Y]; + const uint32_t uiWidth = rect.width; + const uint32_t uiHeight = rect.height; + + const CPelBuf resiBuf = cs.getResiBuf(rect); + CoeffBuf tempCoeff(m_mtsCoeffs[tu.mtsIdx[0]], rect); + xT(tu, COMPONENT_Y, resiBuf, tempCoeff, uiWidth, uiHeight); + + const TCoeff *dstCoeffBuf = tempCoeff.buf; + const int dstCoeffStride = tempCoeff.stride; + uint64_t coeffAbsSum = 0; + + for (int y = 0; y < uiHeight; y++) + { + for (int x = 0; x < uiWidth; x++) + { + coeffAbsSum += abs(dstCoeffBuf[(y * dstCoeffStride) + x]); + } + } + return coeffAbsSum; +} +#endif void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, TCoeff& uiAbsSum, const Ctx& ctx, const bool loadTr ) { CodingStructure &cs = *tu.cs; @@ -1803,7 +1913,11 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const PelBuf bufResiTemplateReshape(predResiTemplateReshape, uiWidth + uiHeight - 1, 1); int trHor, trVer; getTrTypes(tu, residCompID, trHor, trVer); +#if JVET_W0103_INTRA_MTS + int actualTrIdx = trHor * NUM_TRANS_TYPE + trVer; +#else int actualTrIdx = trHor * 3 + trVer; +#endif int log2Width = floorLog2(uiWidth); int log2Height = floorLog2(uiHeight); if(!g_resiBorderTemplate[log2Width-2][log2Height-2][actualTrIdx]) diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h index 8556825e4e00241ab087347b8a5609876dfea999..77d99074a3565ce3cce5c314fb2b589fe8dd38fd 100644 --- a/source/Lib/CommonLib/TrQuant.h +++ b/source/Lib/CommonLib/TrQuant.h @@ -157,6 +157,9 @@ public: void invTransformNxN (TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQPs); void transformNxN ( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, std::vector<TrMode>* trModes, const int maxCand ); void transformNxN ( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, TCoeff& uiAbsSum, const Ctx& ctx, const bool loadTr = false ); +#if JVET_W0103_INTRA_MTS + uint64_t transformNxN(TransformUnit& tu); +#endif void transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &compID, const TCoeff &resiDiff, TCoeff &coeff, const uint32_t &uiPos, const QpParam &cQP, const bool bUseHalfRoundingPoint); void invTrSkipDeQuantOneSample (TransformUnit &tu, const ComponentID &compID, const TCoeff &pcCoeff, Pel &reconSample, const uint32_t &uiPos, const QpParam &cQP); diff --git a/source/Lib/CommonLib/TrQuant_EMT.cpp b/source/Lib/CommonLib/TrQuant_EMT.cpp index e0e8ef8ccdb917ca379a3dc5a217694a0c2cbd6a..6912cb56b000dc05c8137b546b633b1fe45cea87 100644 --- a/source/Lib/CommonLib/TrQuant_EMT.cpp +++ b/source/Lib/CommonLib/TrQuant_EMT.cpp @@ -1973,4 +1973,289 @@ void fastInverseDCT8_B256( const TCoeff *src, TCoeff *dst, int shift, int line, { _fastInverseMM< 256 >( src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_trCoreDCT8P256[0] ); } +#endif +#if JVET_W0103_INTRA_MTS +//DCT5 +void fastForwardDCT5_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr4[DCT5][0]); +} + +void fastInverseDCT5_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr4[DCT5][0]); +} + +void fastForwardDCT5_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr8[DCT5][0]); +} + +void fastInverseDCT5_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr8[DCT5][0]); +} + +void fastForwardDCT5_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr16[DCT5][0]); +} + +void fastInverseDCT5_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr16[DCT5][0]); +} + +void fastForwardDCT5_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr32[DCT5][0]); +} + +void fastInverseDCT5_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr32[DCT5][0]); +} + +void fastForwardDCT5_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr64[DCT5][0]); +} + +void fastInverseDCT5_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr64[DCT5][0]); +} + +void fastForwardDCT5_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr128[DCT5][0]); +} + +void fastInverseDCT5_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr128[DCT5][0]); +} + +void fastForwardDCT5_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr256[DCT5][0]); +} + +void fastInverseDCT5_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr256[DCT5][0]); +} + +//DST4 +void fastForwardDST4_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr4[DST4][0]); +} + +void fastInverseDST4_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr4[DST4][0]); +} + +void fastForwardDST4_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr8[DST4][0]); +} + +void fastInverseDST4_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr8[DST4][0]); +} + +void fastForwardDST4_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr16[DST4][0]); +} + +void fastInverseDST4_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr16[DST4][0]); +} + +void fastForwardDST4_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr32[DST4][0]); +} + +void fastInverseDST4_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr32[DST4][0]); +} + +void fastForwardDST4_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr64[DST4][0]); +} + +void fastInverseDST4_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr64[DST4][0]); +} + +void fastForwardDST4_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr128[DST4][0]); +} + +void fastInverseDST4_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr128[DST4][0]); +} + +void fastForwardDST4_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr256[DST4][0]); +} + +void fastInverseDST4_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr256[DST4][0]); +} + +//DST1 +void fastForwardDST1_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr4[DST1][0]); +} + +void fastInverseDST1_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr4[DST1][0]); +} + +void fastForwardDST1_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr8[DST1][0]); +} + +void fastInverseDST1_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr8[DST1][0]); +} + +void fastForwardDST1_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr16[DST1][0]); +} + +void fastInverseDST1_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr16[DST1][0]); +} + +void fastForwardDST1_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr32[DST1][0]); +} + +void fastInverseDST1_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr32[DST1][0]); +} + +void fastForwardDST1_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr64[DST1][0]); +} + +void fastInverseDST1_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr64[DST1][0]); +} + +void fastForwardDST1_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr128[DST1][0]); +} + +void fastInverseDST1_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr128[DST1][0]); +} + +void fastForwardDST1_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr256[DST1][0]); +} + +void fastInverseDST1_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr256[DST1][0]); +} + +//IDTR +void fastForwardIDTR_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr4[IDTR][0]); +} + +void fastInverseIDTR_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr4[IDTR][0]); +} + +void fastForwardIDTR_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr8[IDTR][0]); +} + +void fastInverseIDTR_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr8[IDTR][0]); +} + +void fastForwardIDTR_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr16[IDTR][0]); +} + +void fastInverseIDTR_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr16[IDTR][0]); +} + +void fastForwardIDTR_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr32[IDTR][0]); +} + +void fastInverseIDTR_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr32[IDTR][0]); +} + +void fastForwardIDTR_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr64[IDTR][0]); +} + +void fastInverseIDTR_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr64[IDTR][0]); +} + +void fastForwardIDTR_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr128[IDTR][0]); +} + +void fastInverseIDTR_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr128[IDTR][0]); +} + +void fastForwardIDTR_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2) +{ + _fastForwardMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr256[IDTR][0]); +} + +void fastInverseIDTR_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum) +{ + _fastInverseMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr256[IDTR][0]); +} #endif \ No newline at end of file diff --git a/source/Lib/CommonLib/TrQuant_EMT.h b/source/Lib/CommonLib/TrQuant_EMT.h index 03d7491dce4ee3196c0cf5e17e339eb5cb1e6af1..2379795dacd6027157d2c3f085a199854d67b670 100644 --- a/source/Lib/CommonLib/TrQuant_EMT.h +++ b/source/Lib/CommonLib/TrQuant_EMT.h @@ -97,5 +97,78 @@ void fastForwardDCT8_B256( const TCoeff *src, TCoeff *dst, int shift, int line, void fastInverseDCT8_B256( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum ); #endif +#if JVET_W0103_INTRA_MTS +//DST-IV transforms +void fastForwardDST4_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST4_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST4_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST4_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST4_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST4_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST4_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST4_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST4_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST4_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +#if TU_256 +void fastForwardDST4_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST4_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST4_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST4_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +#endif + +//DST-I transforms +void fastForwardDST1_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST1_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST1_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST1_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST1_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST1_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST1_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST1_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST1_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST1_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +#if TU_256 +void fastForwardDST1_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST1_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDST1_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDST1_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +#endif + +//DCT-V transforms +void fastForwardDCT5_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDCT5_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDCT5_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDCT5_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDCT5_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDCT5_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDCT5_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDCT5_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDCT5_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDCT5_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +#if TU_256 +void fastForwardDCT5_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDCT5_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardDCT5_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseDCT5_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +#endif + +//IDTR transforms +void fastForwardIDTR_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseIDTR_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardIDTR_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseIDTR_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardIDTR_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseIDTR_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardIDTR_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseIDTR_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardIDTR_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseIDTR_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +#if TU_256 +void fastForwardIDTR_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseIDTR_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +void fastForwardIDTR_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2); +void fastInverseIDTR_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum); +#endif +#endif #endif // __TRQUANT__ diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 63d251c2662434aba131f547bb649856a53767d5..83d7c38c4c00b07468f3815a5f00b0e51a6bad89 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -52,10 +52,13 @@ + + #define BASE_ENCODER 1 #define BASE_NORMATIVE 1 #define TOOLS 1 + #if BASE_ENCODER // Lossy encoder speedups #define AFFINE_ENC_OPT 1 // Affine encoder optimization @@ -132,6 +135,8 @@ #define EXTENDED_LFNST 1 // Extended LFNST #define SIGN_PREDICTION 1 // transform coefficients sign prediction +#define JVET_W0103_INTRA_MTS 1 // JVET W0103: Extended Intra MTS + // Entropy Coding #define EC_HIGH_PRECISION 1 // CABAC high precision #define SLICE_TYPE_WIN_SIZE 1 // Context window initialization based on slice type @@ -157,7 +162,6 @@ #if JVET_V0094_BILATERAL_FILTER #define ENABLE_SIMD_BILATERAL_FILTER 1 #endif - #endif // tools @@ -541,11 +545,23 @@ enum QuantFlags //EMT transform tags enum TransType { +#if JVET_W0103_INTRA_MTS + DCT2 = 0, + DCT8 = 1, + DST7 = 2, + DCT5 = 3, + DST4 = 4, + DST1 = 5, + IDTR = 6, + NUM_TRANS_TYPE = 7, + DCT2_EMT = 8 +#else DCT2 = 0, DCT8 = 1, DST7 = 2, NUM_TRANS_TYPE = 3, DCT2_EMT = 4 +#endif }; enum MTSIdx diff --git a/source/Lib/CommonLib/x86/TrQuantX86.h b/source/Lib/CommonLib/x86/TrQuantX86.h index 5f7238fc60ea6aa9385a797399af4a7ccb3b524f..97f5215e77fc940b6dee55727666c623d283dc88 100644 --- a/source/Lib/CommonLib/x86/TrQuantX86.h +++ b/source/Lib/CommonLib/x86/TrQuantX86.h @@ -547,6 +547,12 @@ void TrQuant::_initTrQuantX86() { g_trCoreDCT2P2[TRANSFORM_FORWARD][0], g_trCoreDCT2P4[TRANSFORM_FORWARD][0], g_trCoreDCT2P8[TRANSFORM_FORWARD][0], g_trCoreDCT2P16[TRANSFORM_FORWARD][0], g_trCoreDCT2P32[TRANSFORM_FORWARD][0], g_trCoreDCT2P64[TRANSFORM_FORWARD][0], g_trCoreDCT2P128[TRANSFORM_FORWARD][0], g_trCoreDCT2P256[0] }, { nullptr, g_trCoreDCT8P4[TRANSFORM_FORWARD][0], g_trCoreDCT8P8[TRANSFORM_FORWARD][0], g_trCoreDCT8P16[TRANSFORM_FORWARD][0], g_trCoreDCT8P32[TRANSFORM_FORWARD][0], g_trCoreDCT8P64[TRANSFORM_FORWARD][0], g_trCoreDCT8P128[TRANSFORM_FORWARD][0], g_trCoreDCT8P256[0] }, { nullptr, g_trCoreDST7P4[TRANSFORM_FORWARD][0], g_trCoreDST7P8[TRANSFORM_FORWARD][0], g_trCoreDST7P16[TRANSFORM_FORWARD][0], g_trCoreDST7P32[TRANSFORM_FORWARD][0], g_trCoreDST7P64[TRANSFORM_FORWARD][0], g_trCoreDST7P128[TRANSFORM_FORWARD][0], g_trCoreDST7P256[0] }, +#if JVET_W0103_INTRA_MTS + { nullptr, g_aiTr4[DCT5][0], g_aiTr8[DCT5][0], g_aiTr16[DCT5][0], g_aiTr32[DCT5][0], g_aiTr64[DCT5][0], g_aiTr128[DCT5][0], g_aiTr256[DCT5][0] }, + { nullptr, g_aiTr4[DST4][0], g_aiTr8[DST4][0], g_aiTr16[DST4][0], g_aiTr32[DST4][0], g_aiTr64[DST4][0], g_aiTr128[DST4][0], g_aiTr256[DST4][0] }, + { nullptr, g_aiTr4[DST1][0], g_aiTr8[DST1][0], g_aiTr16[DST1][0], g_aiTr32[DST1][0], g_aiTr64[DST1][0], g_aiTr128[DST1][0], g_aiTr256[DST1][0] }, + { nullptr, g_aiTr4[IDTR][0], g_aiTr8[IDTR][0], g_aiTr16[IDTR][0], g_aiTr32[IDTR][0], g_aiTr64[IDTR][0], g_aiTr128[IDTR][0], g_aiTr256[IDTR][0] }, +#endif } }; m_inverseTransformKernels = @@ -554,6 +560,12 @@ void TrQuant::_initTrQuantX86() { g_trCoreDCT2P2[TRANSFORM_INVERSE][0], g_trCoreDCT2P4[TRANSFORM_INVERSE][0], g_trCoreDCT2P8[TRANSFORM_INVERSE][0], g_trCoreDCT2P16[TRANSFORM_INVERSE][0], g_trCoreDCT2P32[TRANSFORM_INVERSE][0], g_trCoreDCT2P64[TRANSFORM_INVERSE][0], g_trCoreDCT2P128[TRANSFORM_INVERSE][0], g_trCoreDCT2P256[0] }, { nullptr, g_trCoreDCT8P4[TRANSFORM_INVERSE][0], g_trCoreDCT8P8[TRANSFORM_INVERSE][0], g_trCoreDCT8P16[TRANSFORM_INVERSE][0], g_trCoreDCT8P32[TRANSFORM_INVERSE][0], g_trCoreDCT8P64[TRANSFORM_INVERSE][0], g_trCoreDCT8P128[TRANSFORM_INVERSE][0], g_trCoreDCT8P256[0] }, { nullptr, g_trCoreDST7P4[TRANSFORM_INVERSE][0], g_trCoreDST7P8[TRANSFORM_INVERSE][0], g_trCoreDST7P16[TRANSFORM_INVERSE][0], g_trCoreDST7P32[TRANSFORM_INVERSE][0], g_trCoreDST7P64[TRANSFORM_INVERSE][0], g_trCoreDST7P128[TRANSFORM_INVERSE][0], g_trCoreDST7P256[0] }, +#if JVET_W0103_INTRA_MTS + { nullptr, g_aiTr4[DCT5][0], g_aiTr8[DCT5][0], g_aiTr16[DCT5][0], g_aiTr32[DCT5][0], g_aiTr64[DCT5][0], g_aiTr128[DCT5][0], g_aiTr256[DCT5][0] }, + { nullptr, g_aiTr4[DST4][0], g_aiTr8[DST4][0], g_aiTr16[DST4][0], g_aiTr32[DST4][0], g_aiTr64[DST4][0], g_aiTr128[DST4][0], g_aiTr256[DST4][0] }, + { nullptr, g_aiTr4[DST1][0], g_aiTr8[DST1][0], g_aiTr16[DST1][0], g_aiTr32[DST1][0], g_aiTr64[DST1][0], g_aiTr128[DST1][0], g_aiTr256[DST1][0] }, + { nullptr, g_aiTr4[IDTR][0], g_aiTr8[IDTR][0], g_aiTr16[IDTR][0], g_aiTr32[IDTR][0], g_aiTr64[IDTR][0], g_aiTr128[IDTR][0], g_aiTr256[IDTR][0] }, +#endif } }; fastFwdTrans[0][0] = fastForwardTransform_SIMD<DCT2, 2>; @@ -583,6 +595,44 @@ void TrQuant::_initTrQuantX86() fastFwdTrans[2][6] = fastForwardTransform_SIMD<DST7, 128>; fastFwdTrans[2][7] = fastForwardTransform_SIMD<DST7, 256>; +#if JVET_W0103_INTRA_MTS + fastFwdTrans[3][0] = nullptr; + fastFwdTrans[3][1] = fastForwardTransform_SIMD<DCT5, 4>; + fastFwdTrans[3][2] = fastForwardTransform_SIMD<DCT5, 8>; + fastFwdTrans[3][3] = fastForwardTransform_SIMD<DCT5, 16>; + fastFwdTrans[3][4] = fastForwardTransform_SIMD<DCT5, 32>; + fastFwdTrans[3][5] = fastForwardTransform_SIMD<DCT5, 64>; + fastFwdTrans[3][6] = fastForwardTransform_SIMD<DCT5, 128>; + fastFwdTrans[3][7] = fastForwardTransform_SIMD<DCT5, 256>; + + fastFwdTrans[4][0] = nullptr; + fastFwdTrans[4][1] = fastForwardTransform_SIMD<DST4, 4>; + fastFwdTrans[4][2] = fastForwardTransform_SIMD<DST4, 8>; + fastFwdTrans[4][3] = fastForwardTransform_SIMD<DST4, 16>; + fastFwdTrans[4][4] = fastForwardTransform_SIMD<DST4, 32>; + fastFwdTrans[4][5] = fastForwardTransform_SIMD<DST4, 64>; + fastFwdTrans[4][6] = fastForwardTransform_SIMD<DST4, 128>; + fastFwdTrans[4][7] = fastForwardTransform_SIMD<DST4, 256>; + + fastFwdTrans[5][0] = nullptr; + fastFwdTrans[5][1] = fastForwardTransform_SIMD<DST1, 4>; + fastFwdTrans[5][2] = fastForwardTransform_SIMD<DST1, 8>; + fastFwdTrans[5][3] = fastForwardTransform_SIMD<DST1, 16>; + fastFwdTrans[5][4] = fastForwardTransform_SIMD<DST1, 32>; + fastFwdTrans[5][5] = fastForwardTransform_SIMD<DST1, 64>; + fastFwdTrans[5][6] = fastForwardTransform_SIMD<DST1, 128>; + fastFwdTrans[5][7] = fastForwardTransform_SIMD<DST1, 256>; + + fastFwdTrans[6][0] = nullptr; + fastFwdTrans[6][1] = fastForwardTransform_SIMD<IDTR, 4>; + fastFwdTrans[6][2] = fastForwardTransform_SIMD<IDTR, 8>; + fastFwdTrans[6][3] = fastForwardTransform_SIMD<IDTR, 16>; + fastFwdTrans[6][4] = fastForwardTransform_SIMD<IDTR, 32>; + fastFwdTrans[6][5] = fastForwardTransform_SIMD<IDTR, 64>; + fastFwdTrans[6][6] = fastForwardTransform_SIMD<IDTR, 128>; + fastFwdTrans[6][7] = fastForwardTransform_SIMD<IDTR, 256>; +#endif + fastInvTrans[0][0] = fastInverseTransform_SIMD<DCT2, 2>; fastInvTrans[0][1] = fastInverseTransform_SIMD<DCT2, 4>; fastInvTrans[0][2] = fastInverseTransform_SIMD<DCT2, 8>; @@ -609,6 +659,44 @@ void TrQuant::_initTrQuantX86() fastInvTrans[2][5] = fastInverseTransform_SIMD<DST7, 64>; fastInvTrans[2][6] = fastInverseTransform_SIMD<DST7, 128>; fastInvTrans[2][7] = fastInverseTransform_SIMD<DST7, 256>; + +#if JVET_W0103_INTRA_MTS + fastInvTrans[3][0] = nullptr; + fastInvTrans[3][1] = fastInverseTransform_SIMD<DCT5, 4>; + fastInvTrans[3][2] = fastInverseTransform_SIMD<DCT5, 8>; + fastInvTrans[3][3] = fastInverseTransform_SIMD<DCT5, 16>; + fastInvTrans[3][4] = fastInverseTransform_SIMD<DCT5, 32>; + fastInvTrans[3][5] = fastInverseTransform_SIMD<DCT5, 64>; + fastInvTrans[3][6] = fastInverseTransform_SIMD<DCT5, 128>; + fastInvTrans[3][7] = fastInverseTransform_SIMD<DCT5, 256>; + + fastInvTrans[4][0] = nullptr; + fastInvTrans[4][1] = fastInverseTransform_SIMD<DST4, 4>; + fastInvTrans[4][2] = fastInverseTransform_SIMD<DST4, 8>; + fastInvTrans[4][3] = fastInverseTransform_SIMD<DST4, 16>; + fastInvTrans[4][4] = fastInverseTransform_SIMD<DST4, 32>; + fastInvTrans[4][5] = fastInverseTransform_SIMD<DST4, 64>; + fastInvTrans[4][6] = fastInverseTransform_SIMD<DST4, 128>; + fastInvTrans[4][7] = fastInverseTransform_SIMD<DST4, 256>; + + fastInvTrans[5][0] = nullptr; + fastInvTrans[5][1] = fastInverseTransform_SIMD<DST1, 4>; + fastInvTrans[5][2] = fastInverseTransform_SIMD<DST1, 8>; + fastInvTrans[5][3] = fastInverseTransform_SIMD<DST1, 16>; + fastInvTrans[5][4] = fastInverseTransform_SIMD<DST1, 32>; + fastInvTrans[5][5] = fastInverseTransform_SIMD<DST1, 64>; + fastInvTrans[5][6] = fastInverseTransform_SIMD<DST1, 128>; + fastInvTrans[5][7] = fastInverseTransform_SIMD<DST1, 256>; + + fastInvTrans[6][0] = nullptr; + fastInvTrans[6][1] = fastInverseTransform_SIMD<IDTR, 4>; + fastInvTrans[6][2] = fastInverseTransform_SIMD<IDTR, 8>; + fastInvTrans[6][3] = fastInverseTransform_SIMD<IDTR, 16>; + fastInvTrans[6][4] = fastInverseTransform_SIMD<IDTR, 32>; + fastInvTrans[6][5] = fastInverseTransform_SIMD<IDTR, 64>; + fastInvTrans[6][6] = fastInverseTransform_SIMD<IDTR, 128>; + fastInvTrans[6][7] = fastInverseTransform_SIMD<IDTR, 256>; +#endif #else m_forwardTransformKernels = { { diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index a0ced2ca8a07d59063fdaf837ccf7d5bb7354ec7..2a7113eee1f8149d4147ed596a9b19ad3845ba2d 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -3858,11 +3858,21 @@ void CABACReader::mts_idx( CodingUnit& cu, CUCtx& cuCtx ) cuCtx.mtsLastScanPos && cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[COMPONENT_Y], COMPONENT_Y ); +#if JVET_W0103_INTRA_MTS + int ctxIdx = (cu.mipFlag) ? 3 : 0; +#else int ctxIdx = 0; +#endif int symbol = m_BinDecoder.decodeBin( Ctx::MTSIdx(ctxIdx)); if( symbol ) { +#if JVET_W0103_INTRA_MTS + int bins[2]; + bins[0] = m_BinDecoder.decodeBin(Ctx::MTSIdx(1)); + bins[1] = m_BinDecoder.decodeBin(Ctx::MTSIdx(2)); + mtsIdx = MTS_DST7_DST7 + (bins[0] << 1) + bins[1]; +#else ctxIdx = 1; mtsIdx = MTS_DST7_DST7; // mtsIdx = 2 -- 4 for( int i = 0; i < 3; i++, ctxIdx++ ) @@ -3875,6 +3885,7 @@ void CABACReader::mts_idx( CodingUnit& cu, CUCtx& cuCtx ) break; } } +#endif } } diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index 4904308cf850d1f40e3d256220d054e1791b8dc5..01b001838402ac20b70d35d1f1d8fcb418cfdc66 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -3604,12 +3604,22 @@ void CABACWriter::mts_idx( const CodingUnit& cu, CUCtx* cuCtx ) cuCtx->mtsLastScanPos && cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP) { int symbol = mtsIdx != MTS_DCT2_DCT2 ? 1 : 0; +#if JVET_W0103_INTRA_MTS + int ctxIdx = (cu.mipFlag) ? 3 : 0; +#else int ctxIdx = 0; +#endif m_BinEncoder.encodeBin( symbol, Ctx::MTSIdx(ctxIdx)); if( symbol ) { +#if JVET_W0103_INTRA_MTS + int TrIdx = (tu.mtsIdx[COMPONENT_Y] - MTS_DST7_DST7); + CHECK(TrIdx < 0 || TrIdx >= 4, "TrIdx outside range"); + m_BinEncoder.encodeBin(TrIdx >> 1, Ctx::MTSIdx(1)); + m_BinEncoder.encodeBin(TrIdx & 1, Ctx::MTSIdx(2)); +#else ctxIdx = 1; for( int i = 0; i < 3; i++, ctxIdx++ ) { @@ -3621,6 +3631,7 @@ void CABACWriter::mts_idx( const CodingUnit& cu, CUCtx* cuCtx ) break; } } +#endif } } DTRACE( g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx); diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 2142260474e0634d82ada0799cc17cc8bad0f82a..d6dc2b317dd81897ab461b923c9eca00e19e1048 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -1832,6 +1832,9 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS bool trGrpCheck [ 4 ] = { true, true, true, true }; int startMTSIdx [ 4 ] = { 0, 1, 2, 3 }; int endMTSIdx [ 4 ] = { 0, 1, 2, 3 }; +#if JVET_W0103_INTRA_MTS + endMTSIdx[0] = 3; //put all MTS candidates in "Grp 0" +#endif double trGrpStopThreshold[ 3 ] = { 1.001, 1.001, 1.001 }; int bestMtsFlag = 0; int bestLfnstIdx = 0; @@ -1857,8 +1860,11 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS bool skipOtherLfnst = false; int startLfnstIdx = 0; int endLfnstIdx = sps.getUseLFNST() ? maxLfnstIdx : 0; - +#if JVET_W0103_INTRA_MTS + int grpNumMax = 1; +#else int grpNumMax = sps.getUseLFNST() ? m_pcEncCfg->getMTSIntraMaxCand() : 1; +#endif m_modeCtrl->setISPWasTested(false); m_pcIntraSearch->invalidateBestModeCost(); if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS)) @@ -2222,7 +2228,6 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS xCalDebCost( *tempCS, partitioner ); tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); - #if WCG_EXT DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); #else @@ -2237,6 +2242,7 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS bestCS->tmpColorSpaceIntraCost[colorSpaceIdx] = tempCS->cost; } } + if( !sps.getUseLFNST() ) { xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); @@ -2249,6 +2255,7 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS bestSelFlag [ trGrpIdx ] = true; bestMtsFlag = mtsFlag; bestLfnstIdx = lfnstIdx; + if( bestCS->cus.size() == 1 ) { CodingUnit &cu = *bestCS->cus.front(); @@ -2261,7 +2268,7 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS } } } - + //we decide to skip the non-DCT-II transforms and LFNST according to the ISP results if ((endMtsFlag > 0 || endLfnstIdx > 0) && (cu.ispMode || (bestCS && bestCS->cus[0]->ispMode)) && tempCS->slice->isIntra() && m_pcEncCfg->getUseFastISP()) { @@ -2269,7 +2276,7 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS double bestIspCost = m_modeCtrl->getIspCost(); CHECKD( bestCostDct2NoIsp <= bestIspCost, "wrong cost!" ); double threshold = 1.4; - + double lfnstThreshold = 1.01 * threshold; if( m_modeCtrl->getStopNonDCT2Transforms() || bestCostDct2NoIsp > bestIspCost*lfnstThreshold ) { @@ -2294,6 +2301,16 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS break; } } +#if JVET_W0103_INTRA_MTS + if (lfnstIdx && m_modeCtrl->getMtsFirstPassNoIspCost() != MAX_DOUBLE && isLuma(partitioner.chType)) + { + double threshold = 1.5; + if (m_modeCtrl->getMtsFirstPassNoIspCost() > threshold * bestCS->cost) + { + endLfnstIdx = lfnstIdx; + } + } +#endif } } //for emtCuFlag diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index aa634aa9dcd750952c764c140dd7b54ea5af8cd0..23c47b245bcc8b23b11a4dfb0283591275026a20 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -380,6 +380,45 @@ double IntraSearch::findInterCUCost( CodingUnit &cu ) return COST_UNKNOWN; } #endif +#if JVET_W0103_INTRA_MTS +bool IntraSearch::testISPforCurrCU(const CodingUnit &cu) +{ + CodingStructure &cs = *cu.cs; + auto &pu = *cu.firstPU; + const CompArea &area = pu.Y(); + PelBuf piOrg = cs.getOrgBuf(area); + + Pel* pOrg = piOrg.buf; + int uiWidth = area.width; + int uiHeight = area.height; + int iStride = piOrg.stride; + int Gsum = 0; + int nPix = (uiWidth - 2) * (uiHeight - 2); + for (int y = 1; y < (uiHeight - 1); y++) + { + for (int x = 1; x < (uiWidth - 1); x++) + { + const Pel *p = pOrg + y * iStride + x; + + int iDy = p[-iStride - 1] + 2 * p[-1] + p[iStride - 1] - p[-iStride + 1] - 2 * p[+1] - p[iStride + 1]; + int iDx = p[iStride - 1] + 2 * p[iStride] + p[iStride + 1] - p[-iStride - 1] - 2 * p[-iStride] - p[-iStride + 1]; + + if (iDy == 0 && iDx == 0) + continue; + + int iAmp = (int)(abs(iDx) + abs(iDy)); + Gsum += iAmp; + } + } + Gsum = (Gsum + (nPix >> 1)) / nPix; + + bool testISP = true; + CHECK(m_numModesISPRDO != -1, "m_numModesISPRDO!=-1"); + + m_numModesISPRDO = (Gsum < 50 && uiWidth >= 16 && uiHeight >= 16) ? 1 : 2; + return testISP; +} +#endif bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst, CodingStructure* bestCS) { CodingStructure &cs = *cu.cs; @@ -441,6 +480,18 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c mtsUsageFlag = 0; } +#if JVET_W0103_INTRA_MTS + if (!cu.mtsFlag && !cu.lfnstIdx) + { + m_globalBestCostStore = MAX_DOUBLE; + m_globalBestCostValid = false; + if (bestCS->getCU(partitioner.chType) != NULL && bestCS->getCU(partitioner.chType)->predMode != MODE_INTRA && bestCostSoFar != MAX_DOUBLE) + { + m_globalBestCostStore = bestCostSoFar; + m_globalBestCostValid = true; + } + } +#endif const bool colorTransformIsEnabled = sps.getUseColorTrans() && !CS::isDualITree(cs); const bool isFirstColorSpace = colorTransformIsEnabled && ((m_pcEncCfg->getRGBFormatFlag() && cu.colorTransform) || (!m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform)); const bool isSecondColorSpace = colorTransformIsEnabled && ((m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform) || (!m_pcEncCfg->getRGBFormatFlag() && cu.colorTransform)); @@ -449,7 +500,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c bool ispCanBeUsed = sps.getUseISP() && cu.mtsFlag == 0 && cu.lfnstIdx == 0 && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize()); bool saveDataForISP = ispCanBeUsed && (!colorTransformIsEnabled || isFirstColorSpace); bool testISP = ispCanBeUsed && (!colorTransformIsEnabled || !cu.colorTransform); - +#if JVET_W0103_INTRA_MTS + if (testISP && m_pcEncCfg->getUseFastISP()) + { + m_numModesISPRDO = -1; + testISP &= testISPforCurrCU(cu); + } +#endif if ( saveDataForISP ) { //reset the intra modes lists variables @@ -1016,7 +1073,25 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c if ((m_pcEncCfg->getUseFastLFNST() || !cu.slice->isIntra()) && m_bestModeCostValid[lfnstIdx]) { numModesForFullRD = 0; +#if JVET_W0103_INTRA_MTS + double thresholdSkipMode = 1.0 + ((cu.lfnstIdx > 0) ? 0.1 : 0.8) * (1.4 / sqrt((double)(width * height))); + std::vector<std::pair<ModeInfo, double>> ModeInfoWithDCT2Cost(m_savedNumRdModes[0]); + for (int i = 0; i < m_savedNumRdModes[0]; i++) + { + ModeInfoWithDCT2Cost[i] = { m_savedRdModeList[0][i], m_modeCostStore[0][i] }; + } + std::stable_sort(ModeInfoWithDCT2Cost.begin(), ModeInfoWithDCT2Cost.end(), [](const std::pair<ModeInfo, double> & l, const std::pair<ModeInfo, double> & r) {return l.second < r.second; }); + // **Reorder the modes** and Skip checking the modes with much larger R-D cost than the best mode + for (int i = 0; i < m_savedNumRdModes[0]; i++) + { + if (ModeInfoWithDCT2Cost[i].second <= thresholdSkipMode * ModeInfoWithDCT2Cost[0].second) + { + uiRdModeList.push_back(ModeInfoWithDCT2Cost[i].first); + numModesForFullRD++; + } + } +#else double thresholdSkipMode = 1.0 + ((cu.lfnstIdx > 0) ? 0.1 : 1.0) * (1.4 / sqrt((double) (width * height))); // Skip checking the modes with much larger R-D cost than the best mode @@ -1028,6 +1103,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c numModesForFullRD++; } } +#endif } else // this is necessary because we skip the candidates list calculation, since it was already obtained for // the DCT-II. Now we load it @@ -1060,7 +1136,6 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c CHECK(numModesForFullRD != uiRdModeList.size(), "Inconsistent state!"); #endif // after this point, don't use numModesForFullRD - // PBINTRA fast if (m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable && !cs.slice->getDisableSATDForRD() && (mtsUsageFlag != 2 || lfnstIdx > 0)) @@ -1270,6 +1345,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif #if JVET_V0130_INTRA_TMP cu.tmpFlag = uiOrgMode.tmpFlag; +#if JVET_W0103_INTRA_MTS + if (cu.tmpFlag && cu.mtsFlag) continue; +#endif #endif cu.mipFlag = uiOrgMode.mipFlg; pu.mipTransposedFlag = uiOrgMode.mipTrFlg; @@ -1426,6 +1504,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c sortRdModeListFirstColorSpace(uiOrgMode, csTemp->cost, cu.bdpcmMode, m_savedRdModeFirstColorSpace[m_savedRdModeIdx], m_savedRdCostFirstColorSpace[m_savedRdModeIdx], m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx], m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx]); } } + // check r-d cost if( csTemp->cost < csBest->cost ) { @@ -1444,6 +1523,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost; m_bestModeCostValid[ lfnstIdx ] = true; } +#if JVET_W0103_INTRA_MTS + if (sps.getUseLFNST() && m_globalBestCostStore > csBest->cost) + { + m_globalBestCostStore = csBest->cost; + m_globalBestCostValid = true; + } +#endif if( csBest->cost < bestCurrentCost ) { bestCurrentCost = csBest->cost; @@ -3411,7 +3497,98 @@ uint64_t IntraSearch::xGetIntraFracBitsQTChroma(TransformUnit& currTU, const Com uint64_t fracBits = m_CABACEstimator->getEstFracBits(); return fracBits; } +#if JVET_W0103_INTRA_MTS +void IntraSearch::xSelectAMTForFullRD(TransformUnit &tu) +{ + if (!tu.blocks[COMPONENT_Y].valid()) + { + return; + } + + if (!tu.cu->mtsFlag) + { + return; + } + + CodingStructure &cs = *tu.cs; + m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc()); + + const CompArea &area = tu.blocks[COMPONENT_Y]; + + const ChannelType chType = toChannelType(COMPONENT_Y); + + + PelBuf piOrg = cs.getOrgBuf(area); + PelBuf piPred = cs.getPredBuf(area); + PelBuf piResi = cs.getResiBuf(area); + + const PredictionUnit &pu = *cs.getPU(area.pos(), chType); + + //===== init availability pattern ===== + + PelBuf sharedPredTS(m_pSharedPredTransformSkip[COMPONENT_Y], area); + initIntraPatternChType(*tu.cu, area); + + //===== get prediction signal ===== + if (PU::isMIP(pu, chType)) + { + initIntraMip(pu, area); + predIntraMip(COMPONENT_Y, piPred, pu); + } + else + { + predIntraAng(COMPONENT_Y, piPred, pu); + } + + + // save prediction + sharedPredTS.copyFrom(piPred); + + const Slice &slice = *cs.slice; + //===== get residual signal ===== + piResi.copyFrom(piOrg); + if (slice.getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) + { + piResi.rspSignal(m_pcReshape->getFwdLUT()); + piResi.subtract(piPred); + } + else + { + piResi.subtract(piPred); + } + // do transform and calculate Coeff AbsSum for all MTS candidates + std::vector<std::pair<int, uint64_t>> CoeffAbsSum(4); + + for (int i = 0; i < 4; i++) + { + tu.mtsIdx[0] = i + MTS_DST7_DST7; + uint64_t AbsSum = m_pcTrQuant->transformNxN(tu); + CoeffAbsSum[i] = { i, AbsSum }; + } + std::stable_sort(CoeffAbsSum.begin(), CoeffAbsSum.end(), [](const std::pair<int, uint64_t> & l, const std::pair<int, uint64_t> & r) {return l.second < r.second; }); + + for (int i = 0; i < 4; i++) + { + m_TestAMTForFullRD[i] = CoeffAbsSum[i].first; + } + m_numCandAMTForFullRD = 4; + + if (m_pcEncCfg->getUseFastLFNST()) + { + double skipThreshold = 1.0 + 1.0 / sqrt((double)(area.width*area.height)); + skipThreshold = std::max(skipThreshold, 1.03); + for (int i = 1; i < m_numCandAMTForFullRD; i++) + { + if (CoeffAbsSum[i].second > skipThreshold * CoeffAbsSum[0].second) + { + m_numCandAMTForFullRD = i; + break; + } + } + } +} +#endif void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, const int &default0Save1Load2, uint32_t* numSig, std::vector<TrMode>* trModes, const bool loadTr) { if (!tu.blocks[compID].valid()) @@ -3532,7 +3709,11 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp const Slice &slice = *cs.slice; bool flag = slice.getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag())); +#if JVET_W0103_INTRA_MTS + if (!tu.cu->mtsFlag && isLuma(compID)) +#else if (isLuma(compID)) +#endif { //===== get residual signal ===== piResi.copyFrom( piOrg ); @@ -4254,13 +4435,21 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par bool cbfBestMode = false; bool cbfBestModeValid = false; bool cbfDCT2 = true; - +#if JVET_W0103_INTRA_MTS + if (sps.getUseLFNST() && cu.mtsFlag) xSelectAMTForFullRD(tu); +#endif double bestDCT2cost = MAX_DOUBLE; double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1; for( int modeId = firstCheckId; modeId <= ( sps.getUseLFNST() ? lastCheckId : ( nNumTransformCands - 1 ) ); modeId++ ) { uint8_t transformIndex = modeId; - +#if JVET_W0103_INTRA_MTS + if (sps.getUseLFNST() && cu.mtsFlag) + { + if (modeId >= m_numCandAMTForFullRD) continue; + transformIndex = m_TestAMTForFullRD[modeId]; + } +#endif if( sps.getUseLFNST() ) { if( ( transformIndex < lastCheckId ) || ( ( transformIndex == lastCheckId ) && !checkTransformSkip ) ) //we avoid this if the mode is transformSkip @@ -4370,6 +4559,12 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par } } } +#if JVET_W0103_INTRA_MTS + else if (cu.mtsFlag) + { + xIntraCodingTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, 2, &numSig, nullptr, true); + } +#endif else { xIntraCodingTUBlock( tu, COMPONENT_Y, singleDistTmpLuma, default0Save1Load2, &numSig ); @@ -4443,7 +4638,21 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par { bestDCT2cost = singleCostTmp; } - +#if JVET_W0103_INTRA_MTS + if (sps.getUseLFNST() && cu.mtsFlag) + { + if (singleCostTmp != MAX_DOUBLE) + { + const CompArea& area = tu.blocks[COMPONENT_Y]; + double skipThreshold = 1.0 + 1.0 / sqrt((double)(area.width*area.height)); + skipThreshold = std::max(skipThreshold, !m_pcEncCfg->getUseFastLFNST()? 1.06: 1.03); + if (singleCostTmp > skipThreshold * m_globalBestCostStore) + { + m_numCandAMTForFullRD = modeId + 1; + } + } + } +#endif if (singleCostTmp < dSingleCost) { dSingleCost = singleCostTmp; @@ -6512,13 +6721,18 @@ bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost, ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, bestISPModeInRelCU)); modeIsInList[bestISPModeInRelCU] = true; } - // Planar +#if JVET_W0103_INTRA_MTS + // push planar later when FastISP is on. + if (!m_pcEncCfg->getUseFastISP() && !modeIsInList[mode1]) +#else if (!modeIsInList[mode1]) +#endif { destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, mode1)); modeIsInList[mode1] = true; } + // Best angle in regular intra if (mode2 != -1 && !modeIsInList[mode2]) { @@ -6543,7 +6757,14 @@ bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost, } } } - +#if JVET_W0103_INTRA_MTS + // Planar (after angular modes when FastISP is on) + if (!modeIsInList[mode1]) + { + destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, mode1)); + modeIsInList[mode1] = true; + } +#endif // DC is added after the angles from regular intra if (dcModeIndex != -1 && !modeIsInList[DC_IDX]) { @@ -6555,6 +6776,12 @@ bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost, for (int j = 0; j < NUM_LFNST_NUM_PER_SET; j++) { m_ispTestedModes[j].numOrigModesToTest = (int)destListPtr->size(); +#if JVET_W0103_INTRA_MTS + if (m_pcEncCfg->getUseFastISP() && m_numModesISPRDO != -1 && destListPtr->size() > m_numModesISPRDO) + { + m_ispTestedModes[j].numOrigModesToTest = m_numModesISPRDO; + } +#endif } const int addedModesFromHadList = 3; int newModesAdded = 0; diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h index 31b813dabccf8007b977a5b4d48c8ed6f2e78a89..6afe99d63fa7dc88ec2be3722b77c29fa091e9ca 100644 --- a/source/Lib/EncoderLib/IntraSearch.h +++ b/source/Lib/EncoderLib/IntraSearch.h @@ -198,7 +198,10 @@ class IntraSearch : public IntraPrediction private: EncModeCtrl *m_modeCtrl; Pel* m_pSharedPredTransformSkip[MAX_NUM_TBLOCKS]; - +#if JVET_W0103_INTRA_MTS + int m_TestAMTForFullRD[4]; + int m_numCandAMTForFullRD; +#endif XUCache m_unitCache; CodingStructure ****m_pSplitCS; @@ -371,7 +374,11 @@ private: double m_modeCostStore[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ]; // RD cost of each mode for each PU using DCT2 ModeInfo m_savedRdModeList[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ]; int32_t m_savedNumRdModes[ NUM_LFNST_NUM_PER_SET ]; - +#if JVET_W0103_INTRA_MTS + double m_globalBestCostStore; + bool m_globalBestCostValid; + int m_numModesISPRDO; //full modes for ISP testing. +#endif ModeInfo m_savedRdModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM]; char m_savedBDPCMModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM]; double m_savedRdCostFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM]; @@ -494,7 +501,10 @@ protected: void xIntraCodingTUBlock (TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, const int &default0Save1Load2 = 0, uint32_t* numSig = nullptr, std::vector<TrMode>* trModes=nullptr, const bool loadTr=false ); void xIntraCodingACTTUBlock(TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, std::vector<TrMode>* trModes = nullptr, const bool loadTr = false); - +#if JVET_W0103_INTRA_MTS + void xSelectAMTForFullRD(TransformUnit &tu); + bool testISPforCurrCU(const CodingUnit &cu); +#endif ChromaCbfs xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const PartSplit ispType = TU_NO_ISP ); bool xRecurIntraCodingLumaQT ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinner = false, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false ); bool xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner& pm, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false);