From 4a3fb692430ba602a75a144d6eae8cd6d83d5d0c Mon Sep 17 00:00:00 2001 From: Ted <jhuhong-jheng@kwai.com> Date: Thu, 6 Feb 2020 23:50:44 +0800 Subject: [PATCH] JVET-Q0493: Palette encoder improvements for lossless coding Detail - Use correct precision of distortion based on input bit-depth during the derivation of the palette for lossless coding. - Remove the unnecessary calculation of distortion during palette coding RD decision on lossless. --- source/Lib/CommonLib/TypeDef.h | 2 + source/Lib/EncoderLib/IntraSearch.cpp | 161 ++++++++++++++++++++++++++ source/Lib/EncoderLib/IntraSearch.h | 34 ++++++ 3 files changed, 197 insertions(+) diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 209f4f6a4..fefa0ba8e 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,6 +50,8 @@ #include <assert.h> #include <cassert> +#define JVET_Q0493_PLT_ENCODER_LOSSLESS 1 // JVET-Q0493: Palette encoder improvements for lossless coding + #define JVET_Q0629_REMOVAL_PLT_4X4 1 // JVET-Q0629: Removal of 4x4 blocks in palette mode #define JVET_Q0291_REDUCE_DUALTREE_PLT_SIZE 1 // JVET-Q0291: reduce palette size of dual tree from 31 to 15 and palette predictor size of dual tree from 63 to 31 diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 617856537..9cdc5d8c7 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -1791,6 +1791,9 @@ void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitione CodingUnit &cu = *cs.getCU(partitioner.chType); uint32_t height = cu.block(compBegin).height; uint32_t width = cu.block(compBegin).width; +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING); +#endif CPelBuf orgBuf[3]; for (int comp = compBegin; comp < (compBegin + numComp); comp++) @@ -1827,9 +1830,33 @@ void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitione uint8_t pltIdx = 0; double minError = MAX_DOUBLE; uint8_t bestIdx = 0; +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + for (uint8_t z = 0; z < cu.curPLTSize[compBegin]; z++) + { + m_indexError[z][rasPos] = minError; + } +#endif while (pltIdx < cu.curPLTSize[compBegin]) { uint64_t sqrtError = 0; +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + if (lossless) + { + for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++) + { + sqrtError += int64_t(abs(curPel[comp] - cu.curPLT[comp][pltIdx])); + } + if (sqrtError == 0) + { + m_indexError[pltIdx][rasPos] = (double) sqrtError; + minError = (double) sqrtError; + bestIdx = pltIdx; + break; + } + } + else + { +#endif for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++) { int64_t tmpErr = int64_t(curPel[comp] - cu.curPLT[comp][pltIdx]); @@ -1848,14 +1875,32 @@ void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitione minError = (double)sqrtError; bestIdx = pltIdx; } +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } +#endif pltIdx++; } Pel paPixelValue[3], paRecoValue[3]; +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + if (!lossless) + { +#endif calcPixelPredRD(cs, partitioner, curPel, paPixelValue, paRecoValue, compBegin, numComp); +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } +#endif uint64_t error = 0, rate = 0; for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++) { +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + if (lossless) + { + rate += m_escapeNumBins[curPel[comp]]; + } + else + { +#endif int64_t tmpErr = int64_t(curPel[comp] - paRecoValue[comp]); if (isChroma((ComponentID)comp)) { @@ -1866,6 +1911,9 @@ void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitione error += tmpErr*tmpErr; } rate += m_escapeNumBins[paPixelValue[comp]]; // encode quantized escape color +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } +#endif } double rdCost = (double)error + m_pcRdCost->getLambda()*(double)rate; m_indexError[cu.curPLTSize[compBegin]][rasPos] = rdCost; @@ -2291,6 +2339,9 @@ void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, u { CodingUnit &cu = *cs.getCU(partitioner.chType); TransformUnit &tu = *cs.getTU(partitioner.chType); +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING); +#endif CPelBuf orgBuf[3]; for (int comp = compBegin; comp < (compBegin + numComp); comp++) @@ -2314,6 +2365,10 @@ void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, u int rightShiftOffset[3]; int invquantiserRightShift[3]; int add[3]; +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + if (!lossless) + { +#endif for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++) { QpParam cQP(tu, ComponentID(ch)); @@ -2326,6 +2381,9 @@ void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, u invquantiserRightShift[ch] = IQUANT_SHIFT; add[ch] = 1 << (invquantiserRightShift[ch] - 1); } +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } +#endif uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc()); uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc()); @@ -2337,19 +2395,43 @@ void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, u PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)ch); if (compBegin != COMPONENT_Y || ch == 0) { +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + if (lossless) + { + escapeValue.at(xPos, yPos) = orgBuf[ch].at(xPos, yPos); + recBuf.at(xPos, yPos) = escapeValue.at(xPos, yPos); + } + else + { +#endif escapeValue.at(xPos, yPos) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]))); assert(escapeValue.at(xPos, yPos) < (1 << (channelBitDepth + 1))); recBuf.at(xPos, yPos) = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch]; recBuf.at(xPos, yPos) = Pel(ClipBD<int>(recBuf.at(xPos, yPos), channelBitDepth));//to be checked +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } +#endif } else if (compBegin == COMPONENT_Y && ch > 0 && yPos % (1 << scaleY) == 0 && xPos % (1 << scaleX) == 0) { uint32_t yPosC = yPos >> scaleY; uint32_t xPosC = xPos >> scaleX; +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + if (lossless) + { + escapeValue.at(xPosC, yPosC) = orgBuf[ch].at(xPosC, yPosC); + recBuf.at(xPosC, yPosC) = escapeValue.at(xPosC, yPosC); + } + else + { +#endif escapeValue.at(xPosC, yPosC) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]))); assert(escapeValue.at(xPosC, yPosC) < (1 << (channelBitDepth + 1))); recBuf.at(xPosC, yPosC) = (((escapeValue.at(xPosC, yPosC)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch]; recBuf.at(xPosC, yPosC) = Pel(ClipBD<int>(recBuf.at(xPosC, yPosC), channelBitDepth));//to be checked +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } +#endif } } } @@ -2358,8 +2440,19 @@ void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, CodingUnit &cu = *cs.getCU(partitioner.chType); const int channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA); const int channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA); +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING); + int pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH); + int pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH); + if (lossless) + { + pcmShiftRight_L = 0; + pcmShiftRight_C = 0; + } +#else const int pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH); const int pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH); +#endif #if JVET_Q0291_REDUCE_DUALTREE_PLT_SIZE int maxPltSize = cu.isSepTree() ? MAXPLTSIZE_DUALTREE : MAXPLTSIZE; @@ -2390,6 +2483,12 @@ void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, int errorLimit = g_paletteQuant[qp]; #else int errorLimit = g_paletteQuant[cu.qp]; +#endif +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + if (lossless) + { + errorLimit = 0; + } #endif uint32_t totalSize = height*width; SortingElement *pelList = new SortingElement[totalSize]; @@ -2428,12 +2527,38 @@ void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, tmpCompBegin = COMPONENT_Y; tmpNumComp = 1; } +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless); + if (lossless) + { + if (bestSAD) + { + for (int i = idx - 1; i >= 0; i--) + { + uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless); + if (sad == 0) + { + bestSAD = sad; + besti = i; + break; + } + } + } + } + else + { +#else int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp); +#endif if( bestSAD ) { for (int i = idx - 1; i >= 0; i--) { +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless); +#else uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp); +#endif if (sad < bestSAD) { bestSAD = sad; @@ -2442,7 +2567,12 @@ void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, } } } +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } + if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless)) +#else if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp)) +#endif { pelList[besti].addElement(element, tmpCompBegin, tmpNumComp); last = besti; @@ -2498,12 +2628,38 @@ void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, org[comp] = orgBuf[comp].at(pX, pY); } element.setAll(org, compBegin, numComp); +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp, lossless); + if (lossless) + { + if (bestSAD) + { + for (int i = idx - 1; i >= 0; i--) + { + uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp, lossless); + if (sad == 0) + { + bestSAD = sad; + besti = i; + break; + } + } + } + } + else + { +#else int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp); +#endif if (bestSAD) { for (int i = idx - 1; i >= 0; i--) { +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp, lossless); +#else uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp); +#endif if (sad < bestSAD) { bestSAD = sad; @@ -2512,7 +2668,12 @@ void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, } } } +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } + if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), compBegin, numComp, lossless)) +#else if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), compBegin, numComp)) +#endif { pelList[besti].addElement(element, compBegin, numComp); last = besti; diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h index da19e57f2..190eff53b 100644 --- a/source/Lib/EncoderLib/IntraSearch.h +++ b/source/Lib/EncoderLib/IntraSearch.h @@ -112,11 +112,27 @@ public: data[ch] = ui[ch]; } } +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + bool almostEqualData(SortingElement element, int errorLimit, const BitDepths& bitDepths, ComponentID compBegin, uint32_t numComp, bool lossless) +#else bool almostEqualData(SortingElement element, int errorLimit, const BitDepths& bitDepths, ComponentID compBegin, uint32_t numComp) +#endif { bool almostEqual = true; for (int comp = compBegin; comp < (compBegin + numComp); comp++) { +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + if (lossless) + { + if ((std::abs(data[comp] - element.data[comp])) > errorLimit) + { + almostEqual = false; + break; + } + } + else + { +#endif uint32_t absError = 0; if (isChroma((ComponentID) comp)) { @@ -131,16 +147,34 @@ public: almostEqual = false; break; } +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } +#endif } return almostEqual; } +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + uint32_t getSAD(SortingElement element, const BitDepths &bitDepths, ComponentID compBegin, uint32_t numComp, bool lossless) +#else uint32_t getSAD(SortingElement element, const BitDepths& bitDepths, ComponentID compBegin, uint32_t numComp) +#endif { uint32_t sumAd = 0; for (int comp = compBegin; comp < (compBegin + numComp); comp++) { ChannelType chType = (comp > 0) ? CHANNEL_TYPE_CHROMA : CHANNEL_TYPE_LUMA; +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + if (lossless) + { + sumAd += (std::abs(data[comp] - element.data[comp])); + } + else + { +#endif sumAd += (std::abs(data[comp] - element.data[comp]) >> (bitDepths.recon[chType] - PLT_ENCBITDEPTH)); +#if JVET_Q0493_PLT_ENCODER_LOSSLESS + } +#endif } return sumAd; } -- GitLab