Newer
Older
if (indexMaxSize <= 1)
{
cu.useRotation[compBegin] = false;
}
//reconstruct pixel
PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin);
for (uint32_t y = 0; y < height; y++)
for (uint32_t x = 0; x < width; x++)
curPLTIdx.at(x, y) = idxMapping[curPLTIdx.at(x, y)];
if (curPLTIdx.at(x, y) == cu.curPLTSize[compBegin])
calcPixelPred(cs, partitioner, y, x, compBegin, numComp);
}
else
{
for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++)
{
CompArea area = cu.blocks[compID];
PelBuf recBuf = cs.getRecoBuf(area);
uint32_t scaleX = getComponentScaleX((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
if (compBegin != COMPONENT_Y || compID == COMPONENT_Y)
{
recBuf.at(x, y) = cu.curPLT[compID][curPLTIdx.at(x, y)];
else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0)
recBuf.at(x >> scaleX, y >> scaleY) = cu.curPLT[compID][curPLTIdx.at(x, y)];
}
}
}
}
}
cs.getPredBuf().fill(0);
cs.getResiBuf().fill(0);
cs.getOrgResiBuf().fill(0);
cs.fracBits = MAX_UINT;
cs.cost = MAX_DOUBLE;
Distortion distortion = 0;
for (uint32_t comp = compBegin; comp < (compBegin + numComp); comp++)
{
const ComponentID compID = ComponentID(comp);
CPelBuf reco = cs.getRecoBuf(compID);
CPelBuf org = cs.getOrgBuf(compID);
Yung-Hsuan Chao (Jessie)
committed
#if WCG_EXT
if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
{
const CPelBuf orgLuma = cs.getOrgBuf(cs.area.blocks[COMPONENT_Y]);
if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
{
const CompArea &areaY = cu.Y();
CompArea tmpArea1(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
}
else
{
distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
}
}
else
Yung-Hsuan Chao (Jessie)
committed
#endif
distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE);
Yung-Hsuan Chao (Jessie)
committed
cs.dist += distortion;
const CompArea &area = cu.blocks[compBegin];
cs.setDecomp(area);
#if JVET_Z0118_GDR
cs.updateReconMotIPM(area);
#else
cs.picture->getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
Yung-Hsuan Chao (Jessie)
committed
}
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
void IntraSearch::calcPixelPredRD(CodingStructure& cs, Partitioner& partitioner, Pel* orgBuf, Pel* paPixelValue, Pel* paRecoValue, ComponentID compBegin, uint32_t numComp)
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
int qp[3];
int qpRem[3];
int qpPer[3];
int quantiserScale[3];
int quantiserRightShift[3];
int rightShiftOffset[3];
int invquantiserRightShift[3];
int add[3];
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
QpParam cQP(tu, ComponentID(ch));
qp[ch] = cQP.Qp(true);
qpRem[ch] = qp[ch] % 6;
qpPer[ch] = qp[ch] / 6;
quantiserScale[ch] = g_quantScales[0][qpRem[ch]];
quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch];
rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1);
invquantiserRightShift[ch] = IQUANT_SHIFT;
add[ch] = 1 << (invquantiserRightShift[ch] - 1);
}
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
paPixelValue[ch] = Pel(std::max<int>(0, ((orgBuf[ch] * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(paPixelValue[ch] < (1 << (channelBitDepth + 1)));
paRecoValue[ch] = (((paPixelValue[ch] * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
paRecoValue[ch] = Pel(ClipBD<int>(paRecoValue[ch], channelBitDepth));//to be checked
}
}
void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
int rasPos;
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
rasPos = y * width + x;;
// chroma discard
bool discardChroma = (compBegin == COMPONENT_Y) && (y&scaleY || x&scaleX);
Pel curPel[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
uint32_t pX1 = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
uint32_t pY1 = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
curPel[comp] = orgBuf[comp].at(pX1, pY1);
}
uint8_t pltIdx = 0;
double minError = MAX_DOUBLE;
uint8_t bestIdx = 0;
for (uint8_t z = 0; z < cu.curPLTSize[compBegin]; z++)
{
m_indexError[z][rasPos] = minError;
}
while (pltIdx < cu.curPLTSize[compBegin])
{
uint64_t sqrtError = 0;
if (lossless)
{
for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
{
sqrtError += int64_t(abs(curPel[comp] - cu.curPLT[comp][pltIdx]));
}
if (sqrtError == 0)
{
m_indexError[pltIdx][rasPos] = (double) sqrtError;
minError = (double) sqrtError;
bestIdx = pltIdx;
break;
}
}
else
{
for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
int64_t tmpErr = int64_t(curPel[comp] - cu.curPLT[comp][pltIdx]);
if (isChroma((ComponentID) comp))
{
sqrtError += uint64_t(tmpErr * tmpErr * ENC_CHROMA_WEIGHTING);
}
else
{
sqrtError += tmpErr * tmpErr;
}
m_indexError[pltIdx][rasPos] = (double) sqrtError;
if (sqrtError < minError)
minError = (double) sqrtError;
bestIdx = pltIdx;
}
}
pltIdx++;
}
Pel paPixelValue[3], paRecoValue[3];
if (!lossless)
{
calcPixelPredRD(cs, partitioner, curPel, paPixelValue, paRecoValue, compBegin, numComp);
uint64_t error = 0, rate = 0;
for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
{
if (lossless)
{
rate += m_escapeNumBins[curPel[comp]];
}
else
{
int64_t tmpErr = int64_t(curPel[comp] - paRecoValue[comp]);
if (isChroma((ComponentID) comp))
{
error += uint64_t(tmpErr * tmpErr * ENC_CHROMA_WEIGHTING);
}
else
{
error += tmpErr * tmpErr;
}
rate += m_escapeNumBins[paPixelValue[comp]]; // encode quantized escape color
}
double rdCost = (double)error + m_pcRdCost->getLambda()*(double)rate;
m_indexError[cu.curPLTSize[compBegin]][rasPos] = rdCost;
if (rdCost < minError)
{
minError = rdCost;
bestIdx = (uint8_t)cu.curPLTSize[compBegin];
}
m_minErrorIndexMap[rasPos] = bestIdx; // save the optimal index of the current pixel
}
}
}
void IntraSearch::deriveIndexMap(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, double& dMinCost, bool* idxExist)
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
int total = height*width;
Pel *runIndex = tu.getPLTIndex(compBegin);
bool *runType = tu.getRunTypes(compBegin);
m_scanOrder = g_scanOrder[SCAN_UNGROUPED][pltScanMode ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
// Trellis initialization
for (int i = 0; i < 2; i++)
{
memset(m_prevRunTypeRDOQ[i], 0, sizeof(Pel)*NUM_TRELLIS_STATE);
memset(m_prevRunPosRDOQ[i], 0, sizeof(int)*NUM_TRELLIS_STATE);
memset(m_stateCostRDOQ[i], 0, sizeof (double)*NUM_TRELLIS_STATE);
}
for (int state = 0; state < NUM_TRELLIS_STATE; state++)
{
m_statePtRDOQ[state][0] = 0;
}
// Context modeling
const FracBitsAccess& fracBits = m_CABACEstimator->getCtx().getFracBitsAcess();
BinFracBits fracBitsPltCopyFlagIndex[RUN_IDX_THRE + 1];
for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
{
const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_INDEX, dist);
fracBitsPltCopyFlagIndex[dist] = fracBits.getFracBitsArray(Ctx::IdxRunModel( ctxId ) );
}
BinFracBits fracBitsPltCopyFlagAbove[RUN_IDX_THRE + 1];
for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
{
const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_COPY, dist);
fracBitsPltCopyFlagAbove[dist] = fracBits.getFracBitsArray(Ctx::CopyRunModel( ctxId ) );
}
const BinFracBits fracBitsPltRunType = fracBits.getFracBitsArray( Ctx::RunTypeFlag() );
// Trellis RDO per CG
bool contTrellisRD = true;
for (int subSetId = 0; ( subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE ) && contTrellisRD; subSetId++)
{
int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE;
int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE);
maxSubPos = (maxSubPos > total) ? total : maxSubPos; // if last position is out of the current CU size
contTrellisRD = deriveSubblockIndexMap(cs, partitioner, compBegin, pltScanMode, minSubPos, maxSubPos, fracBitsPltRunType, fracBitsPltCopyFlagIndex, fracBitsPltCopyFlagAbove, dMinCost, (bool)pltScanMode);
}
if (!contTrellisRD)
{
return;
}
// best state at the last scan position
double sumRdCost = MAX_DOUBLE;
uint8_t bestState = 0;
for (uint8_t state = 0; state < NUM_TRELLIS_STATE; state++)
{
if (m_stateCostRDOQ[0][state] < sumRdCost)
{
sumRdCost = m_stateCostRDOQ[0][state];
bestState = state;
}
}
bool checkRunTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
uint8_t checkIndexTable[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
uint8_t bestStateTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
uint8_t nextState = bestState;
// best trellis path
for (int i = (width*height - 1); i >= 0; i--)
{
bestStateTable[i] = nextState;
int rasterPos = m_scanOrder[i].idx;
nextState = m_statePtRDOQ[nextState][rasterPos];
}
// reconstruct index and runs based on the state pointers
for (int i = 0; i < (width*height); i++)
{
int rasterPos = m_scanOrder[i].idx;
int abovePos = (pltScanMode == PLT_SCAN_HORTRAV) ? m_scanOrder[i].idx - width : m_scanOrder[i].idx - 1;
nextState = bestStateTable[i];
if ( nextState == 0 ) // same as the previous
{
checkRunTable[rasterPos] = checkRunTable[ m_scanOrder[i - 1].idx ];
if ( checkRunTable[rasterPos] == PLT_RUN_INDEX )
{
checkIndexTable[rasterPos] = checkIndexTable[m_scanOrder[i - 1].idx];
}
else
{
checkIndexTable[rasterPos] = checkIndexTable[ abovePos ];
}
}
else if (nextState == 1) // CopyAbove mode
{
checkRunTable[rasterPos] = PLT_RUN_COPY;
checkIndexTable[rasterPos] = checkIndexTable[abovePos];
}
else if (nextState == 2) // Index mode
{
checkRunTable[rasterPos] = PLT_RUN_INDEX;
checkIndexTable[rasterPos] = m_minErrorIndexMap[rasterPos];
}
}
// Escape flag
m_bestEscape = false;
for (int pos = 0; pos < (width*height); pos++)
{
uint8_t index = checkIndexTable[pos];
if (index == cu.curPLTSize[compBegin])
{
m_bestEscape = true;
break;
}
}
// Horizontal scan v.s vertical scan
if (sumRdCost < dMinCost)
{
cu.useEscape[compBegin] = m_bestEscape;
m_bestScanRotationMode = pltScanMode;
memset(idxExist, false, sizeof(bool) * (MAXPLTSIZE + 1));
for (int pos = 0; pos < (width*height); pos++)
{
runIndex[pos] = checkIndexTable[pos];
runType[pos] = checkRunTable[pos];
idxExist[checkIndexTable[pos]] = true;
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
}
dMinCost = sumRdCost;
}
}
bool IntraSearch::deriveSubblockIndexMap(
CodingStructure& cs,
Partitioner& partitioner,
ComponentID compBegin,
PLTScanMode pltScanMode,
int minSubPos,
int maxSubPos,
const BinFracBits& fracBitsPltRunType,
const BinFracBits* fracBitsPltIndexINDEX,
const BinFracBits* fracBitsPltIndexCOPY,
const double minCost,
bool useRotate
)
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
int indexMaxValue = cu.curPLTSize[compBegin];
int refId = 0;
int currRasterPos, currScanPos, prevScanPos, aboveScanPos, roffset;
int log2Width = (pltScanMode == PLT_SCAN_HORTRAV) ? floorLog2(width): floorLog2(height);
int buffersize = (pltScanMode == PLT_SCAN_HORTRAV) ? 2*width: 2*height;
for (int curPos = minSubPos; curPos < maxSubPos; curPos++)
{
currRasterPos = m_scanOrder[curPos].idx;
prevScanPos = (curPos == 0) ? 0 : (curPos - 1) % buffersize;
roffset = (curPos >> log2Width) << log2Width;
aboveScanPos = roffset - (curPos - roffset + 1);
aboveScanPos %= buffersize;
currScanPos = curPos % buffersize;
if ((pltScanMode == PLT_SCAN_HORTRAV && curPos < width) || (pltScanMode == PLT_SCAN_VERTRAV && curPos < height))
{
aboveScanPos = -1; // first column/row: above row is not valid
}
// 1st state: same as previous scanned sample
// 2nd state: Copy_Above mode
// 3rd state: Index mode
// Loop of current state
for ( int curState = 0; curState < NUM_TRELLIS_STATE; curState++ )
{
double minRdCost = MAX_DOUBLE;
int minState = 0; // best prevState
uint8_t bestRunIndex = 0;
bool bestRunType = 0;
bool bestPrevCodedType = 0;
int bestPrevCodedPos = 0;
if ( ( curState == 0 && curPos == 0 ) || ( curState == 1 && aboveScanPos < 0 ) ) // state not available
{
m_stateCostRDOQ[1 - refId][curState] = MAX_DOUBLE;
continue;
}
bool runType = 0;
uint8_t runIndex = 0;
if ( curState == 1 ) // 2nd state: Copy_Above mode
{
runType = PLT_RUN_COPY;
}
else if ( curState == 2 ) // 3rd state: Index mode
{
runType = PLT_RUN_INDEX;
runIndex = m_minErrorIndexMap[currRasterPos];
}
// Loop of previous state
for ( int stateID = 0; stateID < NUM_TRELLIS_STATE; stateID++ )
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
{
if ( m_stateCostRDOQ[refId][stateID] == MAX_DOUBLE )
{
continue;
}
if ( curState == 0 ) // 1st state: same as previous scanned sample
{
runType = m_runMapRDOQ[refId][stateID][prevScanPos];
runIndex = ( runType == PLT_RUN_INDEX ) ? m_indexMapRDOQ[refId][stateID][ prevScanPos ] : m_indexMapRDOQ[refId][stateID][ aboveScanPos ];
}
else if ( curState == 1 ) // 2nd state: Copy_Above mode
{
runIndex = m_indexMapRDOQ[refId][stateID][aboveScanPos];
}
bool prevRunType = m_runMapRDOQ[refId][stateID][prevScanPos];
uint8_t prevRunIndex = m_indexMapRDOQ[refId][stateID][prevScanPos];
uint8_t aboveRunIndex = (aboveScanPos >= 0) ? m_indexMapRDOQ[refId][stateID][aboveScanPos] : 0;
int dist = curPos - m_prevRunPosRDOQ[refId][stateID] - 1;
double rdCost = m_stateCostRDOQ[refId][stateID];
if ( rdCost >= minRdCost ) continue;
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
bool prevCodedRunType = m_prevRunTypeRDOQ[refId][stateID];
int prevCodedPos = m_prevRunPosRDOQ [refId][stateID];
const BinFracBits* fracBitsPt = (m_prevRunTypeRDOQ[refId][stateID] == PLT_RUN_INDEX) ? fracBitsPltIndexINDEX : fracBitsPltIndexCOPY;
rdCost += rateDistOptPLT(runType, runIndex, prevRunType, prevRunIndex, aboveRunIndex, prevCodedRunType, prevCodedPos, curPos, (pltScanMode == PLT_SCAN_HORTRAV) ? width : height, dist, indexMaxValue, fracBitsPt, fracBitsPltRunType);
if (rdCost < minRdCost) // update minState ( minRdCost )
{
minRdCost = rdCost;
minState = stateID;
bestRunType = runType;
bestRunIndex = runIndex;
bestPrevCodedType = prevCodedRunType;
bestPrevCodedPos = prevCodedPos;
}
}
// Update trellis info of current state
m_stateCostRDOQ [1 - refId][curState] = minRdCost;
m_prevRunTypeRDOQ[1 - refId][curState] = bestPrevCodedType;
m_prevRunPosRDOQ [1 - refId][curState] = bestPrevCodedPos;
m_statePtRDOQ[curState][currRasterPos] = minState;
int buffer2update = std::min(buffersize, curPos);
memcpy(m_indexMapRDOQ[1 - refId][curState], m_indexMapRDOQ[refId][minState], sizeof(uint8_t)*buffer2update);
memcpy(m_runMapRDOQ[1 - refId][curState], m_runMapRDOQ[refId][minState], sizeof(bool)*buffer2update);
m_indexMapRDOQ[1 - refId][curState][currScanPos] = bestRunIndex;
m_runMapRDOQ [1 - refId][curState][currScanPos] = bestRunType;
}
if (useRotate) // early terminate: Rd cost >= min cost in horizontal scan
{
if ((m_stateCostRDOQ[1 - refId][0] >= minCost) &&
(m_stateCostRDOQ[1 - refId][1] >= minCost) &&
(m_stateCostRDOQ[1 - refId][2] >= minCost) )
{
return 0;
}
}
refId = 1 - refId;
}
return 1;
}
double IntraSearch::rateDistOptPLT(
bool runType,
uint8_t runIndex,
bool prevRunType,
uint8_t prevRunIndex,
uint8_t aboveRunIndex,
bool& prevCodedRunType,
int& prevCodedPos,
int scanPos,
uint32_t width,
int dist,
int indexMaxValue,
const BinFracBits* IndexfracBits,
const BinFracBits& TypefracBits)
{
double rdCost = 0.0;
bool identityFlag = !( (runType != prevRunType) || ( (runType == PLT_RUN_INDEX) && (runIndex != prevRunIndex) ) );
if ( ( !identityFlag && runType == PLT_RUN_INDEX ) || scanPos == 0 ) // encode index value
{
uint8_t refIndex = (prevRunType == PLT_RUN_INDEX) ? prevRunIndex : aboveRunIndex;
refIndex = (scanPos == 0) ? ( indexMaxValue + 1) : refIndex;
if ( runIndex == refIndex )
{
rdCost = MAX_DOUBLE;
return rdCost;
}
rdCost += m_pcRdCost->getLambda()*(m_truncBinBits[(runIndex > refIndex) ? runIndex - 1 : runIndex][(scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue] << SCALE_BITS);
rdCost += m_indexError[runIndex][m_scanOrder[scanPos].idx] * (1 << SCALE_BITS);
if (scanPos > 0)
{
rdCost += m_pcRdCost->getLambda()*( identityFlag ? (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[1]) : (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[0] ) );
}
if ( !identityFlag && scanPos >= width && prevRunType != PLT_RUN_COPY )
{
rdCost += m_pcRdCost->getLambda()*TypefracBits.intBits[runType];
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
}
if (!identityFlag || scanPos == 0)
{
prevCodedRunType = runType;
prevCodedPos = scanPos;
}
return rdCost;
}
uint32_t IntraSearch::getEpExGolombNumBins(uint32_t symbol, uint32_t count)
{
uint32_t numBins = 0;
while (symbol >= (uint32_t)(1 << count))
{
numBins++;
symbol -= 1 << count;
count++;
}
numBins++;
numBins += count;
assert(numBins <= 32);
return numBins;
}
uint32_t IntraSearch::getTruncBinBits(uint32_t symbol, uint32_t maxSymbol)
{
uint32_t idxCodeBit = 0;
uint32_t thresh;
if (maxSymbol > 256)
{
uint32_t threshVal = 1 << 8;
thresh = 8;
while (threshVal <= maxSymbol)
{
thresh++;
threshVal <<= 1;
}
thresh--;
}
else
{
thresh = g_tbMax[maxSymbol];
}
uint32_t uiVal = 1 << thresh;
assert(uiVal <= maxSymbol);
assert((uiVal << 1) > maxSymbol);
assert(symbol < maxSymbol);
uint32_t b = maxSymbol - uiVal;
assert(b < uiVal);
if (symbol < uiVal - b)
{
idxCodeBit = thresh;
}
else
{
idxCodeBit = thresh + 1;
}
return idxCodeBit;
}
void IntraSearch::initTBCTable(int bitDepth)
{
for (uint32_t i = 0; i < m_symbolSize; i++)
{
memset(m_truncBinBits[i], 0, sizeof(uint16_t)*(m_symbolSize + 1));
}
for (uint32_t i = 0; i < (m_symbolSize + 1); i++)
{
for (uint32_t j = 0; j < i; j++)
{
m_truncBinBits[j][i] = getTruncBinBits(j, i);
}
}
memset(m_escapeNumBins, 0, sizeof(uint16_t)*m_symbolSize);
for (uint32_t i = 0; i < m_symbolSize; i++)
{
m_escapeNumBins[i] = getEpExGolombNumBins(i, 5);
void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
int qp[3];
int qpRem[3];
int qpPer[3];
int quantiserScale[3];
int quantiserRightShift[3];
int rightShiftOffset[3];
int invquantiserRightShift[3];
if (!lossless)
{
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
QpParam cQP(tu, ComponentID(ch));
qp[ch] = cQP.Qp(true);
qpRem[ch] = qp[ch] % 6;
qpPer[ch] = qp[ch] / 6;
quantiserScale[ch] = g_quantScales[0][qpRem[ch]];
quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch];
rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1);
invquantiserRightShift[ch] = IQUANT_SHIFT;
add[ch] = 1 << (invquantiserRightShift[ch] - 1);
}
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
CompArea area = cu.blocks[ch];
PelBuf recBuf = cs.getRecoBuf(area);
PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)ch);
if (compBegin != COMPONENT_Y || ch == 0)
{
if (lossless)
{
escapeValue.at(xPos, yPos) = orgBuf[ch].at(xPos, yPos);
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
recBuf.at(xPos, yPos) = orgBuf[ch].at(xPos, yPos);
#else
recBuf.at(xPos, yPos) = escapeValue.at(xPos, yPos);
}
else
{
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
escapeValue.at(xPos, yPos) = std::max<TCoeff>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]));
assert(escapeValue.at(xPos, yPos) < (TCoeff(1) << (channelBitDepth + 1)));
TCoeff value = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
recBuf.at(xPos, yPos) = Pel(ClipBD<TCoeff>(value, channelBitDepth));//to be checked
#else
escapeValue.at(xPos, yPos) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(escapeValue.at(xPos, yPos) < (1 << (channelBitDepth + 1)));
recBuf.at(xPos, yPos) = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
recBuf.at(xPos, yPos) = Pel(ClipBD<int>(recBuf.at(xPos, yPos), channelBitDepth));//to be checked
else if (compBegin == COMPONENT_Y && ch > 0 && yPos % (1 << scaleY) == 0 && xPos % (1 << scaleX) == 0)
uint32_t yPosC = yPos >> scaleY;
uint32_t xPosC = xPos >> scaleX;
if (lossless)
{
escapeValue.at(xPosC, yPosC) = orgBuf[ch].at(xPosC, yPosC);
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
recBuf.at(xPosC, yPosC) = orgBuf[ch].at(xPosC, yPosC);
#else
recBuf.at(xPosC, yPosC) = escapeValue.at(xPosC, yPosC);
}
else
{
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
escapeValue.at(xPosC, yPosC) = std::max<TCoeff>(
0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]));
assert(escapeValue.at(xPosC, yPosC) < (TCoeff(1) << (channelBitDepth + 1)));
TCoeff value = (((escapeValue.at(xPosC, yPosC) * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch])
>> invquantiserRightShift[ch];
recBuf.at(xPosC, yPosC) = Pel(ClipBD<TCoeff>(value, channelBitDepth)); // to be checked
escapeValue.at(xPosC, yPosC) = TCoeff(std::max<int>(
0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(escapeValue.at(xPosC, yPosC) < (1 << (channelBitDepth + 1)));
recBuf.at(xPosC, yPosC) =
(((escapeValue.at(xPosC, yPosC) * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch])
>> invquantiserRightShift[ch];
recBuf.at(xPosC, yPosC) = Pel(ClipBD<int>(recBuf.at(xPosC, yPosC), channelBitDepth)); // to be checked
}
}
Yung-Hsuan Chao (Jessie)
committed
}
void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
const int channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA);
const int channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA);
bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
int pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH);
int pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH);
if (lossless)
{
pcmShiftRight_L = 0;
pcmShiftRight_C = 0;
}
int maxPltSize = cu.isSepTree() ? MAXPLTSIZE_DUALTREE : MAXPLTSIZE;
#else
int maxPltSize = CS::isDualITree(cs) ? MAXPLTSIZE_DUALTREE : MAXPLTSIZE;
#endif
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
TransformUnit &tu = *cs.getTU(partitioner.chType);
QpParam cQP(tu, compBegin);
int qp = cQP.Qp(true) - 12;
qp = (qp < 0) ? 0 : ((qp > 56) ? 56 : qp);
int errorLimit = g_paletteQuant[qp];
if (lossless)
{
errorLimit = 0;
}
uint32_t totalSize = height*width;
SortingElement *pelList = new SortingElement[totalSize];
SortingElement element;
SortingElement *pelListSort = new SortingElement[MAXPLTSIZE + 1];
uint32_t dictMaxSize = maxPltSize;
int last = -1;
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
uint32_t org[3], pX, pY;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
pX = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
pY = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
org[comp] = orgBuf[comp].at(pX, pY);
}
element.setAll(org, compBegin, numComp);
ComponentID tmpCompBegin = compBegin;
int tmpNumComp = numComp;
if( cs.sps->getChromaFormatIdc() != CHROMA_444 &&
numComp == 3 &&
(x != ((x >> scaleX) << scaleX) || (y != ((y >> scaleY) << scaleY))) )
{
tmpCompBegin = COMPONENT_Y;
tmpNumComp = 1;
}
int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
if (lossless)
{
if (bestSAD)
{
for (int i = idx - 1; i >= 0; i--)
{
uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
if (sad == 0)
{
bestSAD = sad;
besti = i;
break;
}
}
}
}
else
{
for (int i = idx - 1; i >= 0; i--)
uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
if (sad < bestSAD)
{
bestSAD = sad;
besti = i;
if (!sad)
{
break;
}
}
if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless))
{
pelList[besti].addElement(element, tmpCompBegin, tmpNumComp);
last = besti;
}
else
{
pelList[idx].copyDataFrom(element, tmpCompBegin, tmpNumComp);
for (int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++)
pelList[idx].setCnt(1, comp);
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
last = idx;
idx++;
}
}
}
if( cs.sps->getChromaFormatIdc() != CHROMA_444 && numComp == 3 )
{
for( int i = 0; i < idx; i++ )
{
pelList[i].setCnt( pelList[i].getCnt(COMPONENT_Y) + (pelList[i].getCnt(COMPONENT_Cb) >> 2), MAX_NUM_COMPONENT);
}
}
else
{
if( compBegin == 0 )
{
for( int i = 0; i < idx; i++ )
{
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Y), COMPONENT_Cb);
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Y), COMPONENT_Cr);
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Y), MAX_NUM_COMPONENT);
}
}
else
{
for( int i = 0; i < idx; i++ )
{
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Cb), COMPONENT_Y);
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Cb), MAX_NUM_COMPONENT);
}
}
}
for (int i = 0; i < dictMaxSize; i++)
pelListSort[i].setCnt(0, COMPONENT_Y);
pelListSort[i].setCnt(0, COMPONENT_Cb);
pelListSort[i].setCnt(0, COMPONENT_Cr);
pelListSort[i].setCnt(0, MAX_NUM_COMPONENT);
pelListSort[i].resetAll(compBegin, numComp);
}
//bubble sorting
dictMaxSize = 1;
for (int i = 0; i < idx; i++)
if( pelList[i].getCnt(MAX_NUM_COMPONENT) > pelListSort[dictMaxSize - 1].getCnt(MAX_NUM_COMPONENT) )
{
int j;
for (j = dictMaxSize; j > 0; j--)
if (pelList[i].getCnt(MAX_NUM_COMPONENT) > pelListSort[j - 1].getCnt(MAX_NUM_COMPONENT))
pelListSort[j].copyAllFrom(pelListSort[j - 1], compBegin, numComp);
dictMaxSize = std::min(dictMaxSize + 1, (uint32_t)maxPltSize);
}
else
{
break;
}
}
pelListSort[j].copyAllFrom(pelList[i], compBegin, numComp);
}
}
uint64_t numColorBits = 0;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
numColorBits += (comp > 0) ? channelBitDepth_C : channelBitDepth_L;
}
const int plt_lambda_shift = (compBegin > 0) ? pcmShiftRight_C : pcmShiftRight_L;
double bitCost = m_pcRdCost->getLambda() / (double) (1 << (2 * plt_lambda_shift)) * numColorBits;
bool reuseflag[MAXPLTPREDSIZE] = { false };
int run;
double reuseflagCost;
for (int i = 0; i < maxPltSize; i++)
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
{
if( pelListSort[i].getCnt(MAX_NUM_COMPONENT) )
{
ComponentID tmpCompBegin = compBegin;
int tmpNumComp = numComp;
if( cs.sps->getChromaFormatIdc() != CHROMA_444 && numComp == 3 && pelListSort[i].getCnt(COMPONENT_Cb) == 0 )
{
tmpCompBegin = COMPONENT_Y;
tmpNumComp = 1;
}
for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
{
int half = pelListSort[i].getCnt(comp) >> 1;
cu.curPLT[comp][paletteSize] = (pelListSort[i].getSumData(comp) + half) / pelListSort[i].getCnt(comp);
}
int best = -1;
if( errorLimit )
{
double pal[MAX_NUM_COMPONENT], err = 0.0, bestCost = 0.0;
for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
{
pal[comp] = pelListSort[i].getSumData(comp) / (double)pelListSort[i].getCnt(comp);
err = pal[comp] - cu.curPLT[comp][paletteSize];
if( isChroma((ComponentID) comp) )
{
bestCost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C)) * pelListSort[i].getCnt(comp);
}
else
{
bestCost += (err * err) / (1 << (2 * pcmShiftRight_L)) * pelListSort[i].getCnt(comp);
}
}
bestCost += bitCost;
for( int t = 0; t < cs.prevPLT.curPLTSize[compBegin]; t++ )
{
double cost = 0.0;
for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
{
err = pal[comp] - cs.prevPLT.curPLT[comp][t];
if( isChroma((ComponentID) comp) )
{
cost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C)) * pelListSort[i].getCnt(comp);
}
else
{
cost += (err * err) / (1 << (2 * pcmShiftRight_L)) * pelListSort[i].getCnt(comp);
}
}
run = 0;
for (int t2 = t; t2 >= 0; t2--)
{
if (!reuseflag[t2])
{
run++;
}