Newer
Older
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
{
CodingStructure& cs = *(pu.cs);
CompArea area = compID == COMPONENT_Cb ? pu.Cb() : pu.Cr();
PelBuf orgBuf = cs.getOrgBuf(area);
PelBuf predBuf = cs.getPredBuf(area);
int maxIdc = NUM_GLM_IDC - 1;
int mode = pu.intraDir[1];
DistParam distParamSad;
DistParam distParamSatd;
m_pcRdCost->setDistParam(distParamSad, orgBuf, predBuf, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), compID, false);
m_pcRdCost->setDistParam(distParamSatd, orgBuf, predBuf, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), compID, true);
distParamSad.applyWeight = false;
distParamSatd.applyWeight = false;
sadBest = -1;
// Search positive idcs
for ( int idc = 0; idc <= maxIdc; idc++ )
{
pu.glmIdc.setIdc(compID, 0, idc);
pu.glmIdc.setIdc(compID, 1, idc);
predIntraChromaLM( compID, predBuf, pu, area, mode );
int64_t sad = distParamSad.distFunc(distParamSad) * 2;
int64_t satd = distParamSatd.distFunc(distParamSatd);
int64_t sadThis = std::min(sad, satd);
if ( sadBest == -1 || sadThis < sadBest )
{
sadBest = sadThis;
idcBest = idc;
}
}
}
#endif
void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost )
{
if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE )
{
m_cuAreaInSCIPU[m_numCuInSCIPU] = area;
m_cuCostInSCIPU[m_numCuInSCIPU] = cost;
m_numCuInSCIPU++;
}
}
void IntraSearch::initCuAreaCostInSCIPU()
{
for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ )
{
m_cuAreaInSCIPU[i] = Area();
m_cuCostInSCIPU[i] = 0;
}
m_numCuInSCIPU = 0;
}
void IntraSearch::PLTSearch(CodingStructure &cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
cs.getPredBuf().copyFrom(cs.getOrgBuf());
cs.getPredBuf().Y().rspSignal(m_pcReshape->getFwdLUT());
}
cs.prevPLT.curPLTSize[compBegin] = cs.prevPLT.curPLTSize[COMPONENT_Y];
cu.lastPLTSize[compBegin] = cs.prevPLT.curPLTSize[compBegin];
//derive palette
derivePLTLossy(cs, partitioner, compBegin, numComp);
reorderPLT(cs, partitioner, compBegin, numComp);
Yin Zhao
committed
bool idxExist[MAXPLTSIZE + 1] = { false };
preCalcPLTIndexRD(cs, partitioner, compBegin, numComp); // Pre-calculate distortions for each pixel
double rdCost = MAX_DOUBLE;
deriveIndexMap(cs, partitioner, compBegin, numComp, PLT_SCAN_HORTRAV, rdCost, idxExist); // Optimize palette index map (horizontal scan)
if ((cu.curPLTSize[compBegin] + cu.useEscape[compBegin]) > 1)
{
deriveIndexMap(cs, partitioner, compBegin, numComp, PLT_SCAN_VERTRAV, rdCost, idxExist); // Optimize palette index map (vertical scan)
}
// Remove unused palette entries
uint8_t newPLTSize = 0;
int idxMapping[MAXPLTSIZE + 1];
memset(idxMapping, -1, sizeof(int) * (MAXPLTSIZE + 1));
for (int i = 0; i < cu.curPLTSize[compBegin]; i++)
{
if (idxExist[i])
{
idxMapping[i] = newPLTSize;
newPLTSize++;
}
idxMapping[cu.curPLTSize[compBegin]] = cu.useEscape[compBegin]? newPLTSize: -1;
if (newPLTSize != cu.curPLTSize[compBegin]) // there exist unused palette entries
{ // update palette table and reuseflag
Pel curPLTtmp[MAX_NUM_COMPONENT][MAXPLTSIZE];
int reuseFlagIdx = 0, curPLTtmpIdx = 0, reuseEntrySize = 0;
memset(cu.reuseflag[compBegin], false, sizeof(bool) * MAXPLTPREDSIZE);
int compBeginTmp = compBegin;
int numCompTmp = numComp;
memset(cu.reuseflag[COMPONENT_Y], false, sizeof(bool) * MAXPLTPREDSIZE);
compBeginTmp = COMPONENT_Y;
numCompTmp = (cu.chromaFormat != CHROMA_400) ? 3 : 1;
}
for (int curIdx = 0; curIdx < cu.curPLTSize[compBegin]; curIdx++)
{
if (idxExist[curIdx])
{
for (int comp = compBeginTmp; comp < (compBeginTmp + numCompTmp); comp++)
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
curPLTtmp[comp][curPLTtmpIdx] = cu.curPLT[comp][curIdx];
// Update reuse flags
if (curIdx < cu.reusePLTSize[compBegin])
{
bool match = false;
for (; reuseFlagIdx < cs.prevPLT.curPLTSize[compBegin]; reuseFlagIdx++)
{
bool matchTmp = true;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
matchTmp = matchTmp && (curPLTtmp[comp][curPLTtmpIdx] == cs.prevPLT.curPLT[comp][reuseFlagIdx]);
}
if (matchTmp)
{
match = true;
break;
}
}
if (match)
{
cu.reuseflag[compBegin][reuseFlagIdx] = true;
cu.reuseflag[COMPONENT_Y][reuseFlagIdx] = true;
reuseEntrySize++;
}
}
curPLTtmpIdx++;
}
}
cu.reusePLTSize[compBegin] = reuseEntrySize;
// update palette table
cu.curPLTSize[compBegin] = newPLTSize;
cu.curPLTSize[COMPONENT_Y] = newPLTSize;
for (int comp = compBeginTmp; comp < (compBeginTmp + numCompTmp); comp++)
memcpy( cu.curPLT[comp], curPLTtmp[comp], sizeof(Pel)*cu.curPLTSize[compBegin]);
int indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin];
if (indexMaxSize <= 1)
{
cu.useRotation[compBegin] = false;
}
//reconstruct pixel
PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin);
for (uint32_t y = 0; y < height; y++)
for (uint32_t x = 0; x < width; x++)
curPLTIdx.at(x, y) = idxMapping[curPLTIdx.at(x, y)];
if (curPLTIdx.at(x, y) == cu.curPLTSize[compBegin])
calcPixelPred(cs, partitioner, y, x, compBegin, numComp);
}
else
{
for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++)
{
CompArea area = cu.blocks[compID];
PelBuf recBuf = cs.getRecoBuf(area);
uint32_t scaleX = getComponentScaleX((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
if (compBegin != COMPONENT_Y || compID == COMPONENT_Y)
{
recBuf.at(x, y) = cu.curPLT[compID][curPLTIdx.at(x, y)];
else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0)
recBuf.at(x >> scaleX, y >> scaleY) = cu.curPLT[compID][curPLTIdx.at(x, y)];
}
}
}
}
}
cs.getPredBuf().fill(0);
cs.getResiBuf().fill(0);
cs.getOrgResiBuf().fill(0);
cs.fracBits = MAX_UINT;
cs.cost = MAX_DOUBLE;
Distortion distortion = 0;
for (uint32_t comp = compBegin; comp < (compBegin + numComp); comp++)
{
const ComponentID compID = ComponentID(comp);
CPelBuf reco = cs.getRecoBuf(compID);
CPelBuf org = cs.getOrgBuf(compID);
Yung-Hsuan Chao (Jessie)
committed
#if WCG_EXT
if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
{
const CPelBuf orgLuma = cs.getOrgBuf(cs.area.blocks[COMPONENT_Y]);
if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
{
const CompArea &areaY = cu.Y();
CompArea tmpArea1(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
}
else
{
distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
}
}
else
Yung-Hsuan Chao (Jessie)
committed
#endif
distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE);
Yung-Hsuan Chao (Jessie)
committed
cs.dist += distortion;
const CompArea &area = cu.blocks[compBegin];
cs.setDecomp(area);
#if JVET_Z0118_GDR
cs.updateReconMotIPM(area);
#else
cs.picture->getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
Yung-Hsuan Chao (Jessie)
committed
}
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
void IntraSearch::calcPixelPredRD(CodingStructure& cs, Partitioner& partitioner, Pel* orgBuf, Pel* paPixelValue, Pel* paRecoValue, ComponentID compBegin, uint32_t numComp)
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
int qp[3];
int qpRem[3];
int qpPer[3];
int quantiserScale[3];
int quantiserRightShift[3];
int rightShiftOffset[3];
int invquantiserRightShift[3];
int add[3];
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
QpParam cQP(tu, ComponentID(ch));
qp[ch] = cQP.Qp(true);
qpRem[ch] = qp[ch] % 6;
qpPer[ch] = qp[ch] / 6;
quantiserScale[ch] = g_quantScales[0][qpRem[ch]];
quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch];
rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1);
invquantiserRightShift[ch] = IQUANT_SHIFT;
add[ch] = 1 << (invquantiserRightShift[ch] - 1);
}
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
paPixelValue[ch] = Pel(std::max<int>(0, ((orgBuf[ch] * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(paPixelValue[ch] < (1 << (channelBitDepth + 1)));
paRecoValue[ch] = (((paPixelValue[ch] * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
paRecoValue[ch] = Pel(ClipBD<int>(paRecoValue[ch], channelBitDepth));//to be checked
}
}
void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
int rasPos;
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
rasPos = y * width + x;;
// chroma discard
bool discardChroma = (compBegin == COMPONENT_Y) && (y&scaleY || x&scaleX);
Pel curPel[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
uint32_t pX1 = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
uint32_t pY1 = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
curPel[comp] = orgBuf[comp].at(pX1, pY1);
}
uint8_t pltIdx = 0;
double minError = MAX_DOUBLE;
uint8_t bestIdx = 0;
for (uint8_t z = 0; z < cu.curPLTSize[compBegin]; z++)
{
m_indexError[z][rasPos] = minError;
}
while (pltIdx < cu.curPLTSize[compBegin])
{
uint64_t sqrtError = 0;
if (lossless)
{
for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
{
sqrtError += int64_t(abs(curPel[comp] - cu.curPLT[comp][pltIdx]));
}
if (sqrtError == 0)
{
m_indexError[pltIdx][rasPos] = (double) sqrtError;
minError = (double) sqrtError;
bestIdx = pltIdx;
break;
}
}
else
{
for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
int64_t tmpErr = int64_t(curPel[comp] - cu.curPLT[comp][pltIdx]);
if (isChroma((ComponentID) comp))
{
sqrtError += uint64_t(tmpErr * tmpErr * ENC_CHROMA_WEIGHTING);
}
else
{
sqrtError += tmpErr * tmpErr;
}
m_indexError[pltIdx][rasPos] = (double) sqrtError;
if (sqrtError < minError)
minError = (double) sqrtError;
bestIdx = pltIdx;
}
}
pltIdx++;
}
Pel paPixelValue[3], paRecoValue[3];
if (!lossless)
{
calcPixelPredRD(cs, partitioner, curPel, paPixelValue, paRecoValue, compBegin, numComp);
uint64_t error = 0, rate = 0;
for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
{
if (lossless)
{
rate += m_escapeNumBins[curPel[comp]];
}
else
{
int64_t tmpErr = int64_t(curPel[comp] - paRecoValue[comp]);
if (isChroma((ComponentID) comp))
{
error += uint64_t(tmpErr * tmpErr * ENC_CHROMA_WEIGHTING);
}
else
{
error += tmpErr * tmpErr;
}
rate += m_escapeNumBins[paPixelValue[comp]]; // encode quantized escape color
}
double rdCost = (double)error + m_pcRdCost->getLambda()*(double)rate;
m_indexError[cu.curPLTSize[compBegin]][rasPos] = rdCost;
if (rdCost < minError)
{
minError = rdCost;
bestIdx = (uint8_t)cu.curPLTSize[compBegin];
}
m_minErrorIndexMap[rasPos] = bestIdx; // save the optimal index of the current pixel
}
}
}
void IntraSearch::deriveIndexMap(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, double& dMinCost, bool* idxExist)
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
int total = height*width;
Pel *runIndex = tu.getPLTIndex(compBegin);
bool *runType = tu.getRunTypes(compBegin);
m_scanOrder = g_scanOrder[SCAN_UNGROUPED][pltScanMode ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
// Trellis initialization
for (int i = 0; i < 2; i++)
{
memset(m_prevRunTypeRDOQ[i], 0, sizeof(Pel)*NUM_TRELLIS_STATE);
memset(m_prevRunPosRDOQ[i], 0, sizeof(int)*NUM_TRELLIS_STATE);
memset(m_stateCostRDOQ[i], 0, sizeof (double)*NUM_TRELLIS_STATE);
}
for (int state = 0; state < NUM_TRELLIS_STATE; state++)
{
m_statePtRDOQ[state][0] = 0;
}
// Context modeling
const FracBitsAccess& fracBits = m_CABACEstimator->getCtx().getFracBitsAcess();
BinFracBits fracBitsPltCopyFlagIndex[RUN_IDX_THRE + 1];
for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
{
const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_INDEX, dist);
fracBitsPltCopyFlagIndex[dist] = fracBits.getFracBitsArray(Ctx::IdxRunModel( ctxId ) );
}
BinFracBits fracBitsPltCopyFlagAbove[RUN_IDX_THRE + 1];
for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
{
const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_COPY, dist);
fracBitsPltCopyFlagAbove[dist] = fracBits.getFracBitsArray(Ctx::CopyRunModel( ctxId ) );
}
const BinFracBits fracBitsPltRunType = fracBits.getFracBitsArray( Ctx::RunTypeFlag() );
// Trellis RDO per CG
bool contTrellisRD = true;
for (int subSetId = 0; ( subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE ) && contTrellisRD; subSetId++)
{
int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE;
int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE);
maxSubPos = (maxSubPos > total) ? total : maxSubPos; // if last position is out of the current CU size
contTrellisRD = deriveSubblockIndexMap(cs, partitioner, compBegin, pltScanMode, minSubPos, maxSubPos, fracBitsPltRunType, fracBitsPltCopyFlagIndex, fracBitsPltCopyFlagAbove, dMinCost, (bool)pltScanMode);
}
if (!contTrellisRD)
{
return;
}
// best state at the last scan position
double sumRdCost = MAX_DOUBLE;
uint8_t bestState = 0;
for (uint8_t state = 0; state < NUM_TRELLIS_STATE; state++)
{
if (m_stateCostRDOQ[0][state] < sumRdCost)
{
sumRdCost = m_stateCostRDOQ[0][state];
bestState = state;
}
}
bool checkRunTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
uint8_t checkIndexTable[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
uint8_t bestStateTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
uint8_t nextState = bestState;
// best trellis path
for (int i = (width*height - 1); i >= 0; i--)
{
bestStateTable[i] = nextState;
int rasterPos = m_scanOrder[i].idx;
nextState = m_statePtRDOQ[nextState][rasterPos];
}
// reconstruct index and runs based on the state pointers
for (int i = 0; i < (width*height); i++)
{
int rasterPos = m_scanOrder[i].idx;
int abovePos = (pltScanMode == PLT_SCAN_HORTRAV) ? m_scanOrder[i].idx - width : m_scanOrder[i].idx - 1;
nextState = bestStateTable[i];
if ( nextState == 0 ) // same as the previous
{
checkRunTable[rasterPos] = checkRunTable[ m_scanOrder[i - 1].idx ];
if ( checkRunTable[rasterPos] == PLT_RUN_INDEX )
{
checkIndexTable[rasterPos] = checkIndexTable[m_scanOrder[i - 1].idx];
}
else
{
checkIndexTable[rasterPos] = checkIndexTable[ abovePos ];
}
}
else if (nextState == 1) // CopyAbove mode
{
checkRunTable[rasterPos] = PLT_RUN_COPY;
checkIndexTable[rasterPos] = checkIndexTable[abovePos];
}
else if (nextState == 2) // Index mode
{
checkRunTable[rasterPos] = PLT_RUN_INDEX;
checkIndexTable[rasterPos] = m_minErrorIndexMap[rasterPos];
}
}
// Escape flag
m_bestEscape = false;
for (int pos = 0; pos < (width*height); pos++)
{
uint8_t index = checkIndexTable[pos];
if (index == cu.curPLTSize[compBegin])
{
m_bestEscape = true;
break;
}
}
// Horizontal scan v.s vertical scan
if (sumRdCost < dMinCost)
{
cu.useEscape[compBegin] = m_bestEscape;
m_bestScanRotationMode = pltScanMode;
memset(idxExist, false, sizeof(bool) * (MAXPLTSIZE + 1));
for (int pos = 0; pos < (width*height); pos++)
{
runIndex[pos] = checkIndexTable[pos];
runType[pos] = checkRunTable[pos];
idxExist[checkIndexTable[pos]] = true;
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
}
dMinCost = sumRdCost;
}
}
bool IntraSearch::deriveSubblockIndexMap(
CodingStructure& cs,
Partitioner& partitioner,
ComponentID compBegin,
PLTScanMode pltScanMode,
int minSubPos,
int maxSubPos,
const BinFracBits& fracBitsPltRunType,
const BinFracBits* fracBitsPltIndexINDEX,
const BinFracBits* fracBitsPltIndexCOPY,
const double minCost,
bool useRotate
)
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
int indexMaxValue = cu.curPLTSize[compBegin];
int refId = 0;
int currRasterPos, currScanPos, prevScanPos, aboveScanPos, roffset;
int log2Width = (pltScanMode == PLT_SCAN_HORTRAV) ? floorLog2(width): floorLog2(height);
int buffersize = (pltScanMode == PLT_SCAN_HORTRAV) ? 2*width: 2*height;
for (int curPos = minSubPos; curPos < maxSubPos; curPos++)
{
currRasterPos = m_scanOrder[curPos].idx;
prevScanPos = (curPos == 0) ? 0 : (curPos - 1) % buffersize;
roffset = (curPos >> log2Width) << log2Width;
aboveScanPos = roffset - (curPos - roffset + 1);
aboveScanPos %= buffersize;
currScanPos = curPos % buffersize;
if ((pltScanMode == PLT_SCAN_HORTRAV && curPos < width) || (pltScanMode == PLT_SCAN_VERTRAV && curPos < height))
{
aboveScanPos = -1; // first column/row: above row is not valid
}
// 1st state: same as previous scanned sample
// 2nd state: Copy_Above mode
// 3rd state: Index mode
// Loop of current state
for ( int curState = 0; curState < NUM_TRELLIS_STATE; curState++ )
{
double minRdCost = MAX_DOUBLE;
int minState = 0; // best prevState
uint8_t bestRunIndex = 0;
bool bestRunType = 0;
bool bestPrevCodedType = 0;
int bestPrevCodedPos = 0;
if ( ( curState == 0 && curPos == 0 ) || ( curState == 1 && aboveScanPos < 0 ) ) // state not available
{
m_stateCostRDOQ[1 - refId][curState] = MAX_DOUBLE;
continue;
}
bool runType = 0;
uint8_t runIndex = 0;
if ( curState == 1 ) // 2nd state: Copy_Above mode
{
runType = PLT_RUN_COPY;
}
else if ( curState == 2 ) // 3rd state: Index mode
{
runType = PLT_RUN_INDEX;
runIndex = m_minErrorIndexMap[currRasterPos];
}
// Loop of previous state
for ( int stateID = 0; stateID < NUM_TRELLIS_STATE; stateID++ )
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
{
if ( m_stateCostRDOQ[refId][stateID] == MAX_DOUBLE )
{
continue;
}
if ( curState == 0 ) // 1st state: same as previous scanned sample
{
runType = m_runMapRDOQ[refId][stateID][prevScanPos];
runIndex = ( runType == PLT_RUN_INDEX ) ? m_indexMapRDOQ[refId][stateID][ prevScanPos ] : m_indexMapRDOQ[refId][stateID][ aboveScanPos ];
}
else if ( curState == 1 ) // 2nd state: Copy_Above mode
{
runIndex = m_indexMapRDOQ[refId][stateID][aboveScanPos];
}
bool prevRunType = m_runMapRDOQ[refId][stateID][prevScanPos];
uint8_t prevRunIndex = m_indexMapRDOQ[refId][stateID][prevScanPos];
uint8_t aboveRunIndex = (aboveScanPos >= 0) ? m_indexMapRDOQ[refId][stateID][aboveScanPos] : 0;
int dist = curPos - m_prevRunPosRDOQ[refId][stateID] - 1;
double rdCost = m_stateCostRDOQ[refId][stateID];
if ( rdCost >= minRdCost ) continue;
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
bool prevCodedRunType = m_prevRunTypeRDOQ[refId][stateID];
int prevCodedPos = m_prevRunPosRDOQ [refId][stateID];
const BinFracBits* fracBitsPt = (m_prevRunTypeRDOQ[refId][stateID] == PLT_RUN_INDEX) ? fracBitsPltIndexINDEX : fracBitsPltIndexCOPY;
rdCost += rateDistOptPLT(runType, runIndex, prevRunType, prevRunIndex, aboveRunIndex, prevCodedRunType, prevCodedPos, curPos, (pltScanMode == PLT_SCAN_HORTRAV) ? width : height, dist, indexMaxValue, fracBitsPt, fracBitsPltRunType);
if (rdCost < minRdCost) // update minState ( minRdCost )
{
minRdCost = rdCost;
minState = stateID;
bestRunType = runType;
bestRunIndex = runIndex;
bestPrevCodedType = prevCodedRunType;
bestPrevCodedPos = prevCodedPos;
}
}
// Update trellis info of current state
m_stateCostRDOQ [1 - refId][curState] = minRdCost;
m_prevRunTypeRDOQ[1 - refId][curState] = bestPrevCodedType;
m_prevRunPosRDOQ [1 - refId][curState] = bestPrevCodedPos;
m_statePtRDOQ[curState][currRasterPos] = minState;
int buffer2update = std::min(buffersize, curPos);
memcpy(m_indexMapRDOQ[1 - refId][curState], m_indexMapRDOQ[refId][minState], sizeof(uint8_t)*buffer2update);
memcpy(m_runMapRDOQ[1 - refId][curState], m_runMapRDOQ[refId][minState], sizeof(bool)*buffer2update);
m_indexMapRDOQ[1 - refId][curState][currScanPos] = bestRunIndex;
m_runMapRDOQ [1 - refId][curState][currScanPos] = bestRunType;
}
if (useRotate) // early terminate: Rd cost >= min cost in horizontal scan
{
if ((m_stateCostRDOQ[1 - refId][0] >= minCost) &&
(m_stateCostRDOQ[1 - refId][1] >= minCost) &&
(m_stateCostRDOQ[1 - refId][2] >= minCost) )
{
return 0;
}
}
refId = 1 - refId;
}
return 1;
}
double IntraSearch::rateDistOptPLT(
bool runType,
uint8_t runIndex,
bool prevRunType,
uint8_t prevRunIndex,
uint8_t aboveRunIndex,
bool& prevCodedRunType,
int& prevCodedPos,
int scanPos,
uint32_t width,
int dist,
int indexMaxValue,
const BinFracBits* IndexfracBits,
const BinFracBits& TypefracBits)
{
double rdCost = 0.0;
bool identityFlag = !( (runType != prevRunType) || ( (runType == PLT_RUN_INDEX) && (runIndex != prevRunIndex) ) );
if ( ( !identityFlag && runType == PLT_RUN_INDEX ) || scanPos == 0 ) // encode index value
{
uint8_t refIndex = (prevRunType == PLT_RUN_INDEX) ? prevRunIndex : aboveRunIndex;
refIndex = (scanPos == 0) ? ( indexMaxValue + 1) : refIndex;
if ( runIndex == refIndex )
{
rdCost = MAX_DOUBLE;
return rdCost;
}
rdCost += m_pcRdCost->getLambda()*(m_truncBinBits[(runIndex > refIndex) ? runIndex - 1 : runIndex][(scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue] << SCALE_BITS);
rdCost += m_indexError[runIndex][m_scanOrder[scanPos].idx] * (1 << SCALE_BITS);
if (scanPos > 0)
{
rdCost += m_pcRdCost->getLambda()*( identityFlag ? (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[1]) : (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[0] ) );
}
if ( !identityFlag && scanPos >= width && prevRunType != PLT_RUN_COPY )
{
rdCost += m_pcRdCost->getLambda()*TypefracBits.intBits[runType];
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
}
if (!identityFlag || scanPos == 0)
{
prevCodedRunType = runType;
prevCodedPos = scanPos;
}
return rdCost;
}
uint32_t IntraSearch::getEpExGolombNumBins(uint32_t symbol, uint32_t count)
{
uint32_t numBins = 0;
while (symbol >= (uint32_t)(1 << count))
{
numBins++;
symbol -= 1 << count;
count++;
}
numBins++;
numBins += count;
assert(numBins <= 32);
return numBins;
}
uint32_t IntraSearch::getTruncBinBits(uint32_t symbol, uint32_t maxSymbol)
{
uint32_t idxCodeBit = 0;
uint32_t thresh;
if (maxSymbol > 256)
{
uint32_t threshVal = 1 << 8;
thresh = 8;
while (threshVal <= maxSymbol)
{
thresh++;
threshVal <<= 1;
}
thresh--;
}
else
{
thresh = g_tbMax[maxSymbol];
}
uint32_t uiVal = 1 << thresh;
assert(uiVal <= maxSymbol);
assert((uiVal << 1) > maxSymbol);
assert(symbol < maxSymbol);
uint32_t b = maxSymbol - uiVal;
assert(b < uiVal);
if (symbol < uiVal - b)
{
idxCodeBit = thresh;
}
else
{
idxCodeBit = thresh + 1;
}
return idxCodeBit;
}
void IntraSearch::initTBCTable(int bitDepth)
{
for (uint32_t i = 0; i < m_symbolSize; i++)
{
memset(m_truncBinBits[i], 0, sizeof(uint16_t)*(m_symbolSize + 1));
}
for (uint32_t i = 0; i < (m_symbolSize + 1); i++)
{
for (uint32_t j = 0; j < i; j++)
{
m_truncBinBits[j][i] = getTruncBinBits(j, i);
}
}
memset(m_escapeNumBins, 0, sizeof(uint16_t)*m_symbolSize);
for (uint32_t i = 0; i < m_symbolSize; i++)
{
m_escapeNumBins[i] = getEpExGolombNumBins(i, 5);
void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
int qp[3];
int qpRem[3];
int qpPer[3];
int quantiserScale[3];
int quantiserRightShift[3];
int rightShiftOffset[3];
int invquantiserRightShift[3];
if (!lossless)
{
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
QpParam cQP(tu, ComponentID(ch));
qp[ch] = cQP.Qp(true);
qpRem[ch] = qp[ch] % 6;
qpPer[ch] = qp[ch] / 6;
quantiserScale[ch] = g_quantScales[0][qpRem[ch]];
quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch];
rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1);
invquantiserRightShift[ch] = IQUANT_SHIFT;
add[ch] = 1 << (invquantiserRightShift[ch] - 1);
}
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
CompArea area = cu.blocks[ch];
PelBuf recBuf = cs.getRecoBuf(area);
PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)ch);
if (compBegin != COMPONENT_Y || ch == 0)
{
if (lossless)
{
escapeValue.at(xPos, yPos) = orgBuf[ch].at(xPos, yPos);
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
recBuf.at(xPos, yPos) = orgBuf[ch].at(xPos, yPos);
#else
recBuf.at(xPos, yPos) = escapeValue.at(xPos, yPos);
}
else
{
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
escapeValue.at(xPos, yPos) = std::max<TCoeff>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]));
assert(escapeValue.at(xPos, yPos) < (TCoeff(1) << (channelBitDepth + 1)));
TCoeff value = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
recBuf.at(xPos, yPos) = Pel(ClipBD<TCoeff>(value, channelBitDepth));//to be checked
#else
escapeValue.at(xPos, yPos) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(escapeValue.at(xPos, yPos) < (1 << (channelBitDepth + 1)));
recBuf.at(xPos, yPos) = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
recBuf.at(xPos, yPos) = Pel(ClipBD<int>(recBuf.at(xPos, yPos), channelBitDepth));//to be checked
else if (compBegin == COMPONENT_Y && ch > 0 && yPos % (1 << scaleY) == 0 && xPos % (1 << scaleX) == 0)
uint32_t yPosC = yPos >> scaleY;
uint32_t xPosC = xPos >> scaleX;
if (lossless)
{
escapeValue.at(xPosC, yPosC) = orgBuf[ch].at(xPosC, yPosC);
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
recBuf.at(xPosC, yPosC) = orgBuf[ch].at(xPosC, yPosC);
#else
recBuf.at(xPosC, yPosC) = escapeValue.at(xPosC, yPosC);
}
else
{
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
escapeValue.at(xPosC, yPosC) = std::max<TCoeff>(
0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]));
assert(escapeValue.at(xPosC, yPosC) < (TCoeff(1) << (channelBitDepth + 1)));
TCoeff value = (((escapeValue.at(xPosC, yPosC) * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch])
>> invquantiserRightShift[ch];
recBuf.at(xPosC, yPosC) = Pel(ClipBD<TCoeff>(value, channelBitDepth)); // to be checked
escapeValue.at(xPosC, yPosC) = TCoeff(std::max<int>(
0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(escapeValue.at(xPosC, yPosC) < (1 << (channelBitDepth + 1)));
recBuf.at(xPosC, yPosC) =
(((escapeValue.at(xPosC, yPosC) * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch])
>> invquantiserRightShift[ch];
recBuf.at(xPosC, yPosC) = Pel(ClipBD<int>(recBuf.at(xPosC, yPosC), channelBitDepth)); // to be checked
}
}
Yung-Hsuan Chao (Jessie)
committed
}
void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
const int channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA);
const int channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA);
bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
int pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH);
int pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH);
if (lossless)
{
pcmShiftRight_L = 0;
pcmShiftRight_C = 0;
}
int maxPltSize = cu.isSepTree() ? MAXPLTSIZE_DUALTREE : MAXPLTSIZE;
#else
int maxPltSize = CS::isDualITree(cs) ? MAXPLTSIZE_DUALTREE : MAXPLTSIZE;
#endif
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
TransformUnit &tu = *cs.getTU(partitioner.chType);
QpParam cQP(tu, compBegin);
int qp = cQP.Qp(true) - 12;
qp = (qp < 0) ? 0 : ((qp > 56) ? 56 : qp);
int errorLimit = g_paletteQuant[qp];
if (lossless)
{
errorLimit = 0;
}
uint32_t totalSize = height*width;
SortingElement *pelList = new SortingElement[totalSize];
SortingElement element;
SortingElement *pelListSort = new SortingElement[MAXPLTSIZE + 1];
uint32_t dictMaxSize = maxPltSize;
int last = -1;
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
uint32_t org[3], pX, pY;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
pX = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
pY = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
org[comp] = orgBuf[comp].at(pX, pY);
}
element.setAll(org, compBegin, numComp);
ComponentID tmpCompBegin = compBegin;
int tmpNumComp = numComp;
if( cs.sps->getChromaFormatIdc() != CHROMA_444 &&
numComp == 3 &&
(x != ((x >> scaleX) << scaleX) || (y != ((y >> scaleY) << scaleY))) )
{
tmpCompBegin = COMPONENT_Y;
tmpNumComp = 1;
}
int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
if (lossless)
{
if (bestSAD)
{
for (int i = idx - 1; i >= 0; i--)
{
uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
if (sad == 0)
{