Newer
Older
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
Pel *runIndex = tu.getPLTIndex(compBegin);
bool *runType = tu.getRunTypes(compBegin);
m_scanOrder = g_scanOrder[SCAN_UNGROUPED][pltScanMode ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
// Trellis initialization
for (int i = 0; i < 2; i++)
{
memset(m_prevRunTypeRDOQ[i], 0, sizeof(Pel)*NUM_TRELLIS_STATE);
memset(m_prevRunPosRDOQ[i], 0, sizeof(int)*NUM_TRELLIS_STATE);
memset(m_stateCostRDOQ[i], 0, sizeof (double)*NUM_TRELLIS_STATE);
}
for (int state = 0; state < NUM_TRELLIS_STATE; state++)
{
m_statePtRDOQ[state][0] = 0;
}
// Context modeling
const FracBitsAccess& fracBits = m_CABACEstimator->getCtx().getFracBitsAcess();
BinFracBits fracBitsPltCopyFlagIndex[RUN_IDX_THRE + 1];
for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
{
const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_INDEX, dist);
fracBitsPltCopyFlagIndex[dist] = fracBits.getFracBitsArray(Ctx::IdxRunModel( ctxId ) );
}
BinFracBits fracBitsPltCopyFlagAbove[RUN_IDX_THRE + 1];
for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
{
const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_COPY, dist);
fracBitsPltCopyFlagAbove[dist] = fracBits.getFracBitsArray(Ctx::CopyRunModel( ctxId ) );
}
const BinFracBits fracBitsPltRunType = fracBits.getFracBitsArray( Ctx::RunTypeFlag() );
// Trellis RDO per CG
bool contTrellisRD = true;
for (int subSetId = 0; ( subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE ) && contTrellisRD; subSetId++)
{
int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE;
int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE);
maxSubPos = (maxSubPos > total) ? total : maxSubPos; // if last position is out of the current CU size
contTrellisRD = deriveSubblockIndexMap(cs, partitioner, compBegin, pltScanMode, minSubPos, maxSubPos, fracBitsPltRunType, fracBitsPltCopyFlagIndex, fracBitsPltCopyFlagAbove, dMinCost, (bool)pltScanMode);
}
if (!contTrellisRD)
{
return;
}
// best state at the last scan position
double sumRdCost = MAX_DOUBLE;
uint8_t bestState = 0;
for (uint8_t state = 0; state < NUM_TRELLIS_STATE; state++)
{
if (m_stateCostRDOQ[0][state] < sumRdCost)
{
sumRdCost = m_stateCostRDOQ[0][state];
bestState = state;
}
}
bool checkRunTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
uint8_t checkIndexTable[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
uint8_t bestStateTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
uint8_t nextState = bestState;
// best trellis path
for (int i = (width*height - 1); i >= 0; i--)
{
bestStateTable[i] = nextState;
int rasterPos = m_scanOrder[i].idx;
nextState = m_statePtRDOQ[nextState][rasterPos];
}
// reconstruct index and runs based on the state pointers
for (int i = 0; i < (width*height); i++)
{
int rasterPos = m_scanOrder[i].idx;
int abovePos = (pltScanMode == PLT_SCAN_HORTRAV) ? m_scanOrder[i].idx - width : m_scanOrder[i].idx - 1;
nextState = bestStateTable[i];
if ( nextState == 0 ) // same as the previous
{
checkRunTable[rasterPos] = checkRunTable[ m_scanOrder[i - 1].idx ];
if ( checkRunTable[rasterPos] == PLT_RUN_INDEX )
{
checkIndexTable[rasterPos] = checkIndexTable[m_scanOrder[i - 1].idx];
}
else
{
checkIndexTable[rasterPos] = checkIndexTable[ abovePos ];
}
}
else if (nextState == 1) // CopyAbove mode
{
checkRunTable[rasterPos] = PLT_RUN_COPY;
checkIndexTable[rasterPos] = checkIndexTable[abovePos];
}
else if (nextState == 2) // Index mode
{
checkRunTable[rasterPos] = PLT_RUN_INDEX;
checkIndexTable[rasterPos] = m_minErrorIndexMap[rasterPos];
}
}
// Escape flag
m_bestEscape = false;
for (int pos = 0; pos < (width*height); pos++)
{
uint8_t index = checkIndexTable[pos];
if (index == cu.curPLTSize[compBegin])
{
m_bestEscape = true;
break;
}
}
// Horizontal scan v.s vertical scan
if (sumRdCost < dMinCost)
{
cu.useEscape[compBegin] = m_bestEscape;
m_bestScanRotationMode = pltScanMode;
memset(idxExist, false, sizeof(bool) * (MAXPLTSIZE + 1));
for (int pos = 0; pos < (width*height); pos++)
{
runIndex[pos] = checkIndexTable[pos];
runType[pos] = checkRunTable[pos];
idxExist[checkIndexTable[pos]] = true;
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
}
dMinCost = sumRdCost;
}
}
bool IntraSearch::deriveSubblockIndexMap(
CodingStructure& cs,
Partitioner& partitioner,
ComponentID compBegin,
PLTScanMode pltScanMode,
int minSubPos,
int maxSubPos,
const BinFracBits& fracBitsPltRunType,
const BinFracBits* fracBitsPltIndexINDEX,
const BinFracBits* fracBitsPltIndexCOPY,
const double minCost,
bool useRotate
)
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
int indexMaxValue = cu.curPLTSize[compBegin];
int refId = 0;
int currRasterPos, currScanPos, prevScanPos, aboveScanPos, roffset;
int log2Width = (pltScanMode == PLT_SCAN_HORTRAV) ? floorLog2(width): floorLog2(height);
int buffersize = (pltScanMode == PLT_SCAN_HORTRAV) ? 2*width: 2*height;
for (int curPos = minSubPos; curPos < maxSubPos; curPos++)
{
currRasterPos = m_scanOrder[curPos].idx;
prevScanPos = (curPos == 0) ? 0 : (curPos - 1) % buffersize;
roffset = (curPos >> log2Width) << log2Width;
aboveScanPos = roffset - (curPos - roffset + 1);
aboveScanPos %= buffersize;
currScanPos = curPos % buffersize;
if ((pltScanMode == PLT_SCAN_HORTRAV && curPos < width) || (pltScanMode == PLT_SCAN_VERTRAV && curPos < height))
{
aboveScanPos = -1; // first column/row: above row is not valid
}
// 1st state: same as previous scanned sample
// 2nd state: Copy_Above mode
// 3rd state: Index mode
// Loop of current state
for ( int curState = 0; curState < NUM_TRELLIS_STATE; curState++ )
{
double minRdCost = MAX_DOUBLE;
int minState = 0; // best prevState
uint8_t bestRunIndex = 0;
bool bestRunType = 0;
bool bestPrevCodedType = 0;
int bestPrevCodedPos = 0;
if ( ( curState == 0 && curPos == 0 ) || ( curState == 1 && aboveScanPos < 0 ) ) // state not available
{
m_stateCostRDOQ[1 - refId][curState] = MAX_DOUBLE;
continue;
}
bool runType = 0;
uint8_t runIndex = 0;
if ( curState == 1 ) // 2nd state: Copy_Above mode
{
runType = PLT_RUN_COPY;
}
else if ( curState == 2 ) // 3rd state: Index mode
{
runType = PLT_RUN_INDEX;
runIndex = m_minErrorIndexMap[currRasterPos];
}
// Loop of previous state
for ( int stateID = 0; stateID < NUM_TRELLIS_STATE; stateID++ )
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
{
if ( m_stateCostRDOQ[refId][stateID] == MAX_DOUBLE )
{
continue;
}
if ( curState == 0 ) // 1st state: same as previous scanned sample
{
runType = m_runMapRDOQ[refId][stateID][prevScanPos];
runIndex = ( runType == PLT_RUN_INDEX ) ? m_indexMapRDOQ[refId][stateID][ prevScanPos ] : m_indexMapRDOQ[refId][stateID][ aboveScanPos ];
}
else if ( curState == 1 ) // 2nd state: Copy_Above mode
{
runIndex = m_indexMapRDOQ[refId][stateID][aboveScanPos];
}
bool prevRunType = m_runMapRDOQ[refId][stateID][prevScanPos];
uint8_t prevRunIndex = m_indexMapRDOQ[refId][stateID][prevScanPos];
uint8_t aboveRunIndex = (aboveScanPos >= 0) ? m_indexMapRDOQ[refId][stateID][aboveScanPos] : 0;
int dist = curPos - m_prevRunPosRDOQ[refId][stateID] - 1;
double rdCost = m_stateCostRDOQ[refId][stateID];
if ( rdCost >= minRdCost ) continue;
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
bool prevCodedRunType = m_prevRunTypeRDOQ[refId][stateID];
int prevCodedPos = m_prevRunPosRDOQ [refId][stateID];
const BinFracBits* fracBitsPt = (m_prevRunTypeRDOQ[refId][stateID] == PLT_RUN_INDEX) ? fracBitsPltIndexINDEX : fracBitsPltIndexCOPY;
rdCost += rateDistOptPLT(runType, runIndex, prevRunType, prevRunIndex, aboveRunIndex, prevCodedRunType, prevCodedPos, curPos, (pltScanMode == PLT_SCAN_HORTRAV) ? width : height, dist, indexMaxValue, fracBitsPt, fracBitsPltRunType);
if (rdCost < minRdCost) // update minState ( minRdCost )
{
minRdCost = rdCost;
minState = stateID;
bestRunType = runType;
bestRunIndex = runIndex;
bestPrevCodedType = prevCodedRunType;
bestPrevCodedPos = prevCodedPos;
}
}
// Update trellis info of current state
m_stateCostRDOQ [1 - refId][curState] = minRdCost;
m_prevRunTypeRDOQ[1 - refId][curState] = bestPrevCodedType;
m_prevRunPosRDOQ [1 - refId][curState] = bestPrevCodedPos;
m_statePtRDOQ[curState][currRasterPos] = minState;
int buffer2update = std::min(buffersize, curPos);
memcpy(m_indexMapRDOQ[1 - refId][curState], m_indexMapRDOQ[refId][minState], sizeof(uint8_t)*buffer2update);
memcpy(m_runMapRDOQ[1 - refId][curState], m_runMapRDOQ[refId][minState], sizeof(bool)*buffer2update);
m_indexMapRDOQ[1 - refId][curState][currScanPos] = bestRunIndex;
m_runMapRDOQ [1 - refId][curState][currScanPos] = bestRunType;
}
if (useRotate) // early terminate: Rd cost >= min cost in horizontal scan
{
if ((m_stateCostRDOQ[1 - refId][0] >= minCost) &&
(m_stateCostRDOQ[1 - refId][1] >= minCost) &&
(m_stateCostRDOQ[1 - refId][2] >= minCost) )
{
return 0;
}
}
refId = 1 - refId;
}
return 1;
}
double IntraSearch::rateDistOptPLT(
bool runType,
uint8_t runIndex,
bool prevRunType,
uint8_t prevRunIndex,
uint8_t aboveRunIndex,
bool& prevCodedRunType,
int& prevCodedPos,
int scanPos,
uint32_t width,
int dist,
int indexMaxValue,
const BinFracBits* IndexfracBits,
const BinFracBits& TypefracBits)
{
double rdCost = 0.0;
bool identityFlag = !( (runType != prevRunType) || ( (runType == PLT_RUN_INDEX) && (runIndex != prevRunIndex) ) );
if ( ( !identityFlag && runType == PLT_RUN_INDEX ) || scanPos == 0 ) // encode index value
{
uint8_t refIndex = (prevRunType == PLT_RUN_INDEX) ? prevRunIndex : aboveRunIndex;
refIndex = (scanPos == 0) ? ( indexMaxValue + 1) : refIndex;
if ( runIndex == refIndex )
{
rdCost = MAX_DOUBLE;
return rdCost;
}
rdCost += m_pcRdCost->getLambda()*(m_truncBinBits[(runIndex > refIndex) ? runIndex - 1 : runIndex][(scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue] << SCALE_BITS);
rdCost += m_indexError[runIndex][m_scanOrder[scanPos].idx] * (1 << SCALE_BITS);
if (scanPos > 0)
{
rdCost += m_pcRdCost->getLambda()*( identityFlag ? (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[1]) : (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[0] ) );
}
if ( !identityFlag && scanPos >= width && prevRunType != PLT_RUN_COPY )
{
rdCost += m_pcRdCost->getLambda()*TypefracBits.intBits[runType];
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
}
if (!identityFlag || scanPos == 0)
{
prevCodedRunType = runType;
prevCodedPos = scanPos;
}
return rdCost;
}
uint32_t IntraSearch::getEpExGolombNumBins(uint32_t symbol, uint32_t count)
{
uint32_t numBins = 0;
while (symbol >= (uint32_t)(1 << count))
{
numBins++;
symbol -= 1 << count;
count++;
}
numBins++;
numBins += count;
assert(numBins <= 32);
return numBins;
}
uint32_t IntraSearch::getTruncBinBits(uint32_t symbol, uint32_t maxSymbol)
{
uint32_t idxCodeBit = 0;
uint32_t thresh;
if (maxSymbol > 256)
{
uint32_t threshVal = 1 << 8;
thresh = 8;
while (threshVal <= maxSymbol)
{
thresh++;
threshVal <<= 1;
}
thresh--;
}
else
{
thresh = g_tbMax[maxSymbol];
}
uint32_t uiVal = 1 << thresh;
assert(uiVal <= maxSymbol);
assert((uiVal << 1) > maxSymbol);
assert(symbol < maxSymbol);
uint32_t b = maxSymbol - uiVal;
assert(b < uiVal);
if (symbol < uiVal - b)
{
idxCodeBit = thresh;
}
else
{
idxCodeBit = thresh + 1;
}
return idxCodeBit;
}
void IntraSearch::initTBCTable(int bitDepth)
{
for (uint32_t i = 0; i < m_symbolSize; i++)
{
memset(m_truncBinBits[i], 0, sizeof(uint16_t)*(m_symbolSize + 1));
}
for (uint32_t i = 0; i < (m_symbolSize + 1); i++)
{
for (uint32_t j = 0; j < i; j++)
{
m_truncBinBits[j][i] = getTruncBinBits(j, i);
}
}
memset(m_escapeNumBins, 0, sizeof(uint16_t)*m_symbolSize);
for (uint32_t i = 0; i < m_symbolSize; i++)
{
m_escapeNumBins[i] = getEpExGolombNumBins(i, 5);
void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
int qp[3];
int qpRem[3];
int qpPer[3];
int quantiserScale[3];
int quantiserRightShift[3];
int rightShiftOffset[3];
int invquantiserRightShift[3];
if (!lossless)
{
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
QpParam cQP(tu, ComponentID(ch));
qp[ch] = cQP.Qp(true);
qpRem[ch] = qp[ch] % 6;
qpPer[ch] = qp[ch] / 6;
quantiserScale[ch] = g_quantScales[0][qpRem[ch]];
quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch];
rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1);
invquantiserRightShift[ch] = IQUANT_SHIFT;
add[ch] = 1 << (invquantiserRightShift[ch] - 1);
}
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
CompArea area = cu.blocks[ch];
PelBuf recBuf = cs.getRecoBuf(area);
PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)ch);
if (compBegin != COMPONENT_Y || ch == 0)
{
if (lossless)
{
escapeValue.at(xPos, yPos) = orgBuf[ch].at(xPos, yPos);
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
recBuf.at(xPos, yPos) = orgBuf[ch].at(xPos, yPos);
#else
recBuf.at(xPos, yPos) = escapeValue.at(xPos, yPos);
}
else
{
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
escapeValue.at(xPos, yPos) = std::max<TCoeff>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]));
assert(escapeValue.at(xPos, yPos) < (TCoeff(1) << (channelBitDepth + 1)));
TCoeff value = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
recBuf.at(xPos, yPos) = Pel(ClipBD<TCoeff>(value, channelBitDepth));//to be checked
#else
escapeValue.at(xPos, yPos) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(escapeValue.at(xPos, yPos) < (1 << (channelBitDepth + 1)));
recBuf.at(xPos, yPos) = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
recBuf.at(xPos, yPos) = Pel(ClipBD<int>(recBuf.at(xPos, yPos), channelBitDepth));//to be checked
else if (compBegin == COMPONENT_Y && ch > 0 && yPos % (1 << scaleY) == 0 && xPos % (1 << scaleX) == 0)
uint32_t yPosC = yPos >> scaleY;
uint32_t xPosC = xPos >> scaleX;
if (lossless)
{
escapeValue.at(xPosC, yPosC) = orgBuf[ch].at(xPosC, yPosC);
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
recBuf.at(xPosC, yPosC) = orgBuf[ch].at(xPosC, yPosC);
#else
recBuf.at(xPosC, yPosC) = escapeValue.at(xPosC, yPosC);
}
else
{
#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT_VS
escapeValue.at(xPosC, yPosC) = std::max<TCoeff>(
0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]));
assert(escapeValue.at(xPosC, yPosC) < (TCoeff(1) << (channelBitDepth + 1)));
TCoeff value = (((escapeValue.at(xPosC, yPosC) * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch])
>> invquantiserRightShift[ch];
recBuf.at(xPosC, yPosC) = Pel(ClipBD<TCoeff>(value, channelBitDepth)); // to be checked
escapeValue.at(xPosC, yPosC) = TCoeff(std::max<int>(
0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(escapeValue.at(xPosC, yPosC) < (1 << (channelBitDepth + 1)));
recBuf.at(xPosC, yPosC) =
(((escapeValue.at(xPosC, yPosC) * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch])
>> invquantiserRightShift[ch];
recBuf.at(xPosC, yPosC) = Pel(ClipBD<int>(recBuf.at(xPosC, yPosC), channelBitDepth)); // to be checked
}
}
Yung-Hsuan Chao (Jessie)
committed
}
void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
const int channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA);
const int channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA);
bool lossless = (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && cs.slice->isLossless());
int pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH);
int pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH);
if (lossless)
{
pcmShiftRight_L = 0;
pcmShiftRight_C = 0;
}
int maxPltSize = cu.isSepTree() ? MAXPLTSIZE_DUALTREE : MAXPLTSIZE;
#else
int maxPltSize = CS::isDualITree(cs) ? MAXPLTSIZE_DUALTREE : MAXPLTSIZE;
#endif
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
TransformUnit &tu = *cs.getTU(partitioner.chType);
QpParam cQP(tu, compBegin);
int qp = cQP.Qp(true) - 12;
qp = (qp < 0) ? 0 : ((qp > 56) ? 56 : qp);
int errorLimit = g_paletteQuant[qp];
if (lossless)
{
errorLimit = 0;
}
uint32_t totalSize = height*width;
SortingElement *pelList = new SortingElement[totalSize];
SortingElement element;
SortingElement *pelListSort = new SortingElement[MAXPLTSIZE + 1];
uint32_t dictMaxSize = maxPltSize;
int last = -1;
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
uint32_t org[3], pX, pY;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
pX = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
pY = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
org[comp] = orgBuf[comp].at(pX, pY);
}
element.setAll(org, compBegin, numComp);
ComponentID tmpCompBegin = compBegin;
int tmpNumComp = numComp;
if( cs.sps->getChromaFormatIdc() != CHROMA_444 &&
numComp == 3 &&
(x != ((x >> scaleX) << scaleX) || (y != ((y >> scaleY) << scaleY))) )
{
tmpCompBegin = COMPONENT_Y;
tmpNumComp = 1;
}
int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
if (lossless)
{
if (bestSAD)
{
for (int i = idx - 1; i >= 0; i--)
{
uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
if (sad == 0)
{
bestSAD = sad;
besti = i;
break;
}
}
}
}
else
{
for (int i = idx - 1; i >= 0; i--)
uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless);
if (sad < bestSAD)
{
bestSAD = sad;
besti = i;
if (!sad)
{
break;
}
}
if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), tmpCompBegin, tmpNumComp, lossless))
{
pelList[besti].addElement(element, tmpCompBegin, tmpNumComp);
last = besti;
}
else
{
pelList[idx].copyDataFrom(element, tmpCompBegin, tmpNumComp);
for (int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++)
pelList[idx].setCnt(1, comp);
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
last = idx;
idx++;
}
}
}
if( cs.sps->getChromaFormatIdc() != CHROMA_444 && numComp == 3 )
{
for( int i = 0; i < idx; i++ )
{
pelList[i].setCnt( pelList[i].getCnt(COMPONENT_Y) + (pelList[i].getCnt(COMPONENT_Cb) >> 2), MAX_NUM_COMPONENT);
}
}
else
{
if( compBegin == 0 )
{
for( int i = 0; i < idx; i++ )
{
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Y), COMPONENT_Cb);
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Y), COMPONENT_Cr);
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Y), MAX_NUM_COMPONENT);
}
}
else
{
for( int i = 0; i < idx; i++ )
{
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Cb), COMPONENT_Y);
pelList[i].setCnt(pelList[i].getCnt(COMPONENT_Cb), MAX_NUM_COMPONENT);
}
}
}
for (int i = 0; i < dictMaxSize; i++)
pelListSort[i].setCnt(0, COMPONENT_Y);
pelListSort[i].setCnt(0, COMPONENT_Cb);
pelListSort[i].setCnt(0, COMPONENT_Cr);
pelListSort[i].setCnt(0, MAX_NUM_COMPONENT);
pelListSort[i].resetAll(compBegin, numComp);
}
//bubble sorting
dictMaxSize = 1;
for (int i = 0; i < idx; i++)
if( pelList[i].getCnt(MAX_NUM_COMPONENT) > pelListSort[dictMaxSize - 1].getCnt(MAX_NUM_COMPONENT) )
{
int j;
for (j = dictMaxSize; j > 0; j--)
if (pelList[i].getCnt(MAX_NUM_COMPONENT) > pelListSort[j - 1].getCnt(MAX_NUM_COMPONENT))
pelListSort[j].copyAllFrom(pelListSort[j - 1], compBegin, numComp);
dictMaxSize = std::min(dictMaxSize + 1, (uint32_t)maxPltSize);
}
else
{
break;
}
}
pelListSort[j].copyAllFrom(pelList[i], compBegin, numComp);
}
}
uint64_t numColorBits = 0;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
numColorBits += (comp > 0) ? channelBitDepth_C : channelBitDepth_L;
}
const int plt_lambda_shift = (compBegin > 0) ? pcmShiftRight_C : pcmShiftRight_L;
double bitCost = m_pcRdCost->getLambda() / (double) (1 << (2 * plt_lambda_shift)) * numColorBits;
bool reuseflag[MAXPLTPREDSIZE] = { false };
int run;
double reuseflagCost;
for (int i = 0; i < maxPltSize; i++)
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
{
if( pelListSort[i].getCnt(MAX_NUM_COMPONENT) )
{
ComponentID tmpCompBegin = compBegin;
int tmpNumComp = numComp;
if( cs.sps->getChromaFormatIdc() != CHROMA_444 && numComp == 3 && pelListSort[i].getCnt(COMPONENT_Cb) == 0 )
{
tmpCompBegin = COMPONENT_Y;
tmpNumComp = 1;
}
for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
{
int half = pelListSort[i].getCnt(comp) >> 1;
cu.curPLT[comp][paletteSize] = (pelListSort[i].getSumData(comp) + half) / pelListSort[i].getCnt(comp);
}
int best = -1;
if( errorLimit )
{
double pal[MAX_NUM_COMPONENT], err = 0.0, bestCost = 0.0;
for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
{
pal[comp] = pelListSort[i].getSumData(comp) / (double)pelListSort[i].getCnt(comp);
err = pal[comp] - cu.curPLT[comp][paletteSize];
if( isChroma((ComponentID) comp) )
{
bestCost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C)) * pelListSort[i].getCnt(comp);
}
else
{
bestCost += (err * err) / (1 << (2 * pcmShiftRight_L)) * pelListSort[i].getCnt(comp);
}
}
bestCost += bitCost;
for( int t = 0; t < cs.prevPLT.curPLTSize[compBegin]; t++ )
{
double cost = 0.0;
for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
{
err = pal[comp] - cs.prevPLT.curPLT[comp][t];
if( isChroma((ComponentID) comp) )
{
cost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C)) * pelListSort[i].getCnt(comp);
}
else
{
cost += (err * err) / (1 << (2 * pcmShiftRight_L)) * pelListSort[i].getCnt(comp);
}
}
run = 0;
for (int t2 = t; t2 >= 0; t2--)
{
if (!reuseflag[t2])
{
run++;
}
else
{
break;
}
}
reuseflagCost = m_pcRdCost->getLambda() / (double)(1 << (2 * plt_lambda_shift)) * getEpExGolombNumBins(run ? run + 1 : run, 0);
cost += reuseflagCost;
if( cost < bestCost )
{
best = t;
bestCost = cost;
}
}
if( best != -1 )
{
for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
{
cu.curPLT[comp][paletteSize] = cs.prevPLT.curPLT[comp][best];
}
reuseflag[best] = true;
}
}
bool duplicate = false;
if( pelListSort[i].getCnt(MAX_NUM_COMPONENT) == 1 && best == -1 )
{
duplicate = true;
}
else
{
for( int t = 0; t < paletteSize; t++ )
{
bool duplicateTmp = true;
for( int comp = tmpCompBegin; comp < (tmpCompBegin + tmpNumComp); comp++ )
{
duplicateTmp = duplicateTmp && (cu.curPLT[comp][paletteSize] == cu.curPLT[comp][t]);
}
if( duplicateTmp )
{
duplicate = true;
break;
}
}
}
if( !duplicate )
{
if( cs.sps->getChromaFormatIdc() != CHROMA_444 && numComp == 3 && pelListSort[i].getCnt(COMPONENT_Cb) == 0 )
{
if( best != -1 )
{
cu.curPLT[COMPONENT_Cb][paletteSize] = cs.prevPLT.curPLT[COMPONENT_Cb][best];
cu.curPLT[COMPONENT_Cr][paletteSize] = cs.prevPLT.curPLT[COMPONENT_Cr][best];
}
else
{
cu.curPLT[COMPONENT_Cb][paletteSize] = 1 << (channelBitDepth_C - 1);
cu.curPLT[COMPONENT_Cr][paletteSize] = 1 << (channelBitDepth_C - 1);
}
}
paletteSize++;
}
}
else
{
break;
}
}
cu.curPLTSize[compBegin] = paletteSize;
cu.curPLTSize[COMPONENT_Y] = paletteSize;
delete[] pelList;
delete[] pelListSort;
Yung-Hsuan Chao (Jessie)
committed
}

Karsten Suehring
committed
// -------------------------------------------------------------------------------------------------------------------
// Intra search
// -------------------------------------------------------------------------------------------------------------------
void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx )

Karsten Suehring
committed
{
CodingUnit &cu = *cs.getCU( partitioner.chType );
if (bLuma)
{
bool isFirst = cu.ispMode ? subTuIdx == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos();

Karsten Suehring
committed
// CU header
if( isFirst )
{
if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag() || cs.slice->getSPS()->getPLTMode())

Karsten Suehring
committed
{
m_CABACEstimator->cu_skip_flag( cu );
m_CABACEstimator->pred_mode ( cu );
}
#if ENABLE_DIMD
m_CABACEstimator->cu_dimd_flag(cu);
#endif
if (CU::isPLT(cu))
{
return;
}

Karsten Suehring
committed
}
PredictionUnit &pu = *cs.getPU(partitioner.currArea().lumaPos(), partitioner.chType);
// luma prediction mode

Karsten Suehring
committed
{
m_CABACEstimator->bdpcm_mode( cu, COMPONENT_Y );

Karsten Suehring
committed
}
}
if (bChroma)
{
bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos();
PredictionUnit &pu = *cs.getPU( partitioner.currArea().chromaPos(), CHANNEL_TYPE_CHROMA );

Karsten Suehring
committed
{
m_CABACEstimator->bdpcm_mode( cu, ComponentID(CHANNEL_TYPE_CHROMA) );

Karsten Suehring
committed
}
}
}
void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType )
{
const UnitArea &currArea = partitioner.currArea();
int subTuCounter = subTuIdx;
TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter );
CodingUnit &currCU = *currTU.cu;

Karsten Suehring
committed
uint32_t currDepth = partitioner.currTrDepth;
const bool subdiv = currTU.depth > currDepth;
ComponentID compID = partitioner.chType == CHANNEL_TYPE_LUMA ? COMPONENT_Y : COMPONENT_Cb;

Karsten Suehring
committed

Karsten Suehring
committed
{
CHECK( !subdiv, "TU split implied" );
}
else
{
CHECK( subdiv && !currCU.ispMode && isLuma( compID ), "No TU subdivision is allowed with QTBT" );
}
if (bChroma)
{
const bool chromaCbfISP = currArea.blocks[COMPONENT_Cb].valid() && currCU.ispMode && !subdiv;
if ( !currCU.ispMode || chromaCbfISP )

Karsten Suehring
committed
{
const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat);
const uint32_t cbfDepth = (chromaCbfISP ? currDepth - 1 : currDepth);

Karsten Suehring
committed
for (uint32_t ch = COMPONENT_Cb; ch < numberValidComponents; ch++)

Karsten Suehring
committed
{
const ComponentID compID = ComponentID(ch);

Karsten Suehring
committed
if (currDepth == 0 || TU::getCbfAtDepth(currTU, compID, currDepth - 1) || chromaCbfISP)
{
const bool prevCbf = (compID == COMPONENT_Cr ? TU::getCbfAtDepth(currTU, COMPONENT_Cb, currDepth) : false);
m_CABACEstimator->cbf_comp(cs, TU::getCbfAtDepth(currTU, compID, currDepth), currArea.blocks[compID],
cbfDepth, prevCbf);
}

Karsten Suehring
committed
}
}
}
if (subdiv)
{
if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
{
partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
}
else if( currCU.ispMode && isLuma( compID ) )
{
partitioner.splitCurrArea( ispType, cs );
}

Karsten Suehring
committed
else
{
THROW("Cannot perform an implicit split!");
}

Karsten Suehring
committed
do
{
xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuCounter, ispType );
subTuCounter += subTuCounter != -1 ? 1 : 0;

Karsten Suehring
committed
} while( partitioner.nextPart( cs ) );
partitioner.exitCurrSplit();
}
else
{
//===== Cbfs =====
if (bLuma)
{
bool previousCbf = false;
bool lastCbfIsInferred = false;
if( ispType != TU_NO_ISP )
{
bool rootCbfSoFar = false;
uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight()) : currCU.lwidth() >> floorLog2(currTU.lwidth());
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
if( subTuCounter == nTus - 1 )
{
TransformUnit* tuPointer = currCU.firstTU;
for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
{
rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, currDepth );
tuPointer = tuPointer->next;
}
if( !rootCbfSoFar )
{
lastCbfIsInferred = true;
}
}
if( !lastCbfIsInferred )
{
previousCbf = TU::getPrevTuCbfAtDepth( currTU, COMPONENT_Y, partitioner.currTrDepth );
}
}
if( !lastCbfIsInferred )
{
m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode );
}

Karsten Suehring
committed
}
}
}
void IntraSearch::xEncCoeffQT( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID, const int subTuIdx, const PartSplit ispType, CUCtx* cuCtx )

Karsten Suehring
committed
{
const UnitArea &currArea = partitioner.currArea();
int subTuCounter = subTuIdx;
TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuIdx );

Karsten Suehring
committed
uint32_t currDepth = partitioner.currTrDepth;
const bool subdiv = currTU.depth > currDepth;
if (subdiv)
{
if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
{
partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);