>From 960eca51e6e65e6969f7d829e29ddc2387420733 Mon Sep 17 00:00:00 2001 From: Zeng Zhaoxiu <zhaoxiu.z...@gmail.com> Date: Sat, 14 Feb 2015 19:46:51 +0800 Subject: [PATCH 2/5] avcodec/vc1: optimize block functions
--- libavcodec/vc1_block.c | 352 ++++++++++++++++++++----------------------------- 1 file changed, 142 insertions(+), 210 deletions(-) diff --git a/libavcodec/vc1_block.c b/libavcodec/vc1_block.c index aa62ec2..4976685 100644 --- a/libavcodec/vc1_block.c +++ b/libavcodec/vc1_block.c @@ -585,7 +585,7 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, int i; int16_t *dc_val; int16_t *ac_val, *ac_val2; - int dcdiff; + int dcdiff, scale; /* Get DC differential */ if (n < 4) { @@ -598,16 +598,12 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, return -1; } if (dcdiff) { + int m = (v->pq == 1 || v->pq == 2) ? 3 - v->pq : 0; if (dcdiff == 119 /* ESC index value */) { - /* TODO: Optimize */ - if (v->pq == 1) dcdiff = get_bits(gb, 10); - else if (v->pq == 2) dcdiff = get_bits(gb, 9); - else dcdiff = get_bits(gb, 8); + dcdiff = get_bits(gb, 8 + m); } else { - if (v->pq == 1) - dcdiff = (dcdiff << 2) + get_bits(gb, 2) - 3; - else if (v->pq == 2) - dcdiff = (dcdiff << 1) + get_bits1(gb) - 1; + if (m) + dcdiff = (dcdiff << m) + get_bits(gb, m) - (2 * m - 1); } if (get_bits1(gb)) dcdiff = -dcdiff; @@ -623,21 +619,23 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, } else { block[0] = dcdiff * s->c_dc_scale; } - /* Skip ? */ - if (!coded) { - goto not_coded; - } - // AC Decoding - i = 1; + ac_val = s->ac_val[0][0] + s->block_index[n] * 16; + ac_val2 = ac_val; + if (dc_pred_dir) // left + ac_val -= 16; + else // top + ac_val -= 16 * s->block_wrap[n]; - { + scale = v->pq * 2 + v->halfpq; + + if (coded) { int last = 0, skip, value; const uint8_t *zz_table; - int scale; int k; - scale = v->pq * 2 + v->halfpq; + // AC Decoding + i = 1; if (v->s.ac_pred) { if (!dc_pred_dir) @@ -647,13 +645,6 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, } else zz_table = v->zz_8x8[1]; - ac_val = s->ac_val[0][0] + s->block_index[n] * 16; - ac_val2 = ac_val; - if (dc_pred_dir) // left - ac_val -= 16; - else // top - ac_val -= 16 * s->block_wrap[n]; - while (!last) { vc1_decode_ac_coeff(v, &last, &skip, &value, codingset); i += skip; @@ -664,13 +655,15 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, /* apply AC prediction if needed */ if (s->ac_pred) { + int sh; if (dc_pred_dir) { // left - for (k = 1; k < 8; k++) - block[k << v->left_blk_sh] += ac_val[k]; + sh = v->left_blk_sh; } else { // top - for (k = 1; k < 8; k++) - block[k << v->top_blk_sh] += ac_val[k + 8]; + sh = v->top_blk_sh; + ac_val += 8; } + for (k = 1; k < 8; k++) + block[k << sh] += ac_val[k]; } /* save AC coeffs for further prediction */ for (k = 1; k < 8; k++) { @@ -687,42 +680,31 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, } if (s->ac_pred) i = 63; - } -not_coded: - if (!coded) { - int k, scale; - ac_val = s->ac_val[0][0] + s->block_index[n] * 16; - ac_val2 = ac_val; + } else { + int k; i = 0; - scale = v->pq * 2 + v->halfpq; + memset(ac_val2, 0, 16 * 2); - if (dc_pred_dir) { // left - ac_val -= 16; - if (s->ac_pred) - memcpy(ac_val2, ac_val, 8 * 2); - } else { // top - ac_val -= 16 * s->block_wrap[n]; - if (s->ac_pred) - memcpy(ac_val2 + 8, ac_val + 8, 8 * 2); - } /* apply AC prediction if needed */ if (s->ac_pred) { - if (dc_pred_dir) { //left - for (k = 1; k < 8; k++) { - block[k << v->left_blk_sh] = ac_val[k] * scale; - if (!v->pquantizer && block[k << v->left_blk_sh]) - block[k << v->left_blk_sh] += (block[k << v->left_blk_sh] < 0) ? -v->pq : v->pq; - } + int sh; + if (dc_pred_dir) { // left + sh = v->left_blk_sh; } else { // top - for (k = 1; k < 8; k++) { - block[k << v->top_blk_sh] = ac_val[k + 8] * scale; - if (!v->pquantizer && block[k << v->top_blk_sh]) - block[k << v->top_blk_sh] += (block[k << v->top_blk_sh] < 0) ? -v->pq : v->pq; - } + sh = v->top_blk_sh; + ac_val += 8; + ac_val2 += 8; + } + memcpy(ac_val2, ac_val, 8 * 2); + for (k = 1; k < 8; k++) { + block[k << sh] = ac_val[k] * scale; + if (!v->pquantizer && block[k << sh]) + block[k << v->left_blk_sh] += (block[k << sh] < 0) ? -v->pq : v->pq; } + i = 63; } } @@ -766,16 +748,12 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n, return -1; } if (dcdiff) { + int m = (mquant == 1 || mquant == 2) ? 3 - mquant : 0; if (dcdiff == 119 /* ESC index value */) { - /* TODO: Optimize */ - if (mquant == 1) dcdiff = get_bits(gb, 10); - else if (mquant == 2) dcdiff = get_bits(gb, 9); - else dcdiff = get_bits(gb, 8); + dcdiff = get_bits(gb, 8 + m); } else { - if (mquant == 1) - dcdiff = (dcdiff << 2) + get_bits(gb, 2) - 3; - else if (mquant == 2) - dcdiff = (dcdiff << 1) + get_bits1(gb) - 1; + if (m) + dcdiff = (dcdiff << m) + get_bits(gb, m) - (2 * m - 1); } if (get_bits1(gb)) dcdiff = -dcdiff; @@ -792,22 +770,22 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n, block[0] = dcdiff * s->c_dc_scale; } - //AC Decoding - i = 1; - /* check if AC is needed at all */ if (!a_avail && !c_avail) use_pred = 0; - ac_val = s->ac_val[0][0] + s->block_index[n] * 16; - ac_val2 = ac_val; - scale = mquant * 2 + ((mquant == v->pq) ? v->halfpq : 0); + //AC Decoding + i = 1; + ac_val = s->ac_val[0][0] + s->block_index[n] * 16; + ac_val2 = ac_val; if (dc_pred_dir) // left ac_val -= 16; else // top ac_val -= 16 * s->block_wrap[n]; + scale = mquant * 2 + ((mquant == v->pq) ? v->halfpq : 0); + q1 = s->current_picture.qscale_table[mb_pos]; if ( dc_pred_dir && c_avail && mb_pos) q2 = s->current_picture.qscale_table[mb_pos - 1]; @@ -851,28 +829,24 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n, /* apply AC prediction if needed */ if (use_pred) { + int sh; + if (dc_pred_dir) { // left + sh = v->left_blk_sh; + } else { // top + sh = v->top_blk_sh; + ac_val += 8; + } /* scale predictors if needed*/ if (q2 && q1 != q2) { q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1; q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1; - if (q1 < 1) return AVERROR_INVALIDDATA; - if (dc_pred_dir) { // left - for (k = 1; k < 8; k++) - block[k << v->left_blk_sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; - } else { // top - for (k = 1; k < 8; k++) - block[k << v->top_blk_sh] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; - } + for (k = 1; k < 8; k++) + block[k << sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; } else { - if (dc_pred_dir) { //left - for (k = 1; k < 8; k++) - block[k << v->left_blk_sh] += ac_val[k]; - } else { //top - for (k = 1; k < 8; k++) - block[k << v->top_blk_sh] += ac_val[k + 8]; - } + for (k = 1; k < 8; k++) + block[k << sh] += ac_val[k]; } } /* save AC coeffs for further prediction */ @@ -890,51 +864,37 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n, } if (use_pred) i = 63; + } else { // no AC coeffs int k; memset(ac_val2, 0, 16 * 2); - if (dc_pred_dir) { // left - if (use_pred) { - memcpy(ac_val2, ac_val, 8 * 2); - if (q2 && q1 != q2) { - q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1; - q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1; - if (q1 < 1) - return AVERROR_INVALIDDATA; - for (k = 1; k < 8; k++) - ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; - } - } - } else { // top - if (use_pred) { - memcpy(ac_val2 + 8, ac_val + 8, 8 * 2); - if (q2 && q1 != q2) { - q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1; - q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1; - if (q1 < 1) - return AVERROR_INVALIDDATA; - for (k = 1; k < 8; k++) - ac_val2[k + 8] = (ac_val2[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; - } - } - } /* apply AC prediction if needed */ if (use_pred) { + int sh; if (dc_pred_dir) { // left - for (k = 1; k < 8; k++) { - block[k << v->left_blk_sh] = ac_val2[k] * scale; - if (!v->pquantizer && block[k << v->left_blk_sh]) - block[k << v->left_blk_sh] += (block[k << v->left_blk_sh] < 0) ? -mquant : mquant; - } + sh = v->left_blk_sh; } else { // top - for (k = 1; k < 8; k++) { - block[k << v->top_blk_sh] = ac_val2[k + 8] * scale; - if (!v->pquantizer && block[k << v->top_blk_sh]) - block[k << v->top_blk_sh] += (block[k << v->top_blk_sh] < 0) ? -mquant : mquant; - } + sh = v->top_blk_sh; + ac_val += 8; + ac_val2 += 8; + } + memcpy(ac_val2, ac_val, 8 * 2); + if (q2 && q1 != q2) { + q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1; + q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1; + if (q1 < 1) + return AVERROR_INVALIDDATA; + for (k = 1; k < 8; k++) + ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; } + for (k = 1; k < 8; k++) { + block[k << sh] = ac_val2[k] * scale; + if (!v->pquantizer && block[k << sh]) + block[k << sh] += (block[k << sh] < 0) ? -mquant : mquant; + } + i = 63; } } @@ -987,16 +947,12 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n, return -1; } if (dcdiff) { + int m = (mquant == 1 || mquant == 2) ? 3 - mquant : 0; if (dcdiff == 119 /* ESC index value */) { - /* TODO: Optimize */ - if (mquant == 1) dcdiff = get_bits(gb, 10); - else if (mquant == 2) dcdiff = get_bits(gb, 9); - else dcdiff = get_bits(gb, 8); + dcdiff = get_bits(gb, 8 + m); } else { - if (mquant == 1) - dcdiff = (dcdiff << 2) + get_bits(gb, 2) - 3; - else if (mquant == 2) - dcdiff = (dcdiff << 1) + get_bits1(gb) - 1; + if (m) + dcdiff = (dcdiff << m) + get_bits(gb, m) - (2 * m - 1); } if (get_bits1(gb)) dcdiff = -dcdiff; @@ -1007,7 +963,6 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n, *dc_val = dcdiff; /* Store the quantized DC coeff, used for prediction */ - if (n < 4) { block[0] = dcdiff * s->y_dc_scale; } else { @@ -1023,14 +978,13 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n, if (!a_avail && !c_avail) use_pred = 0; ac_val = s->ac_val[0][0] + s->block_index[n] * 16; ac_val2 = ac_val; - - scale = mquant * 2 + v->halfpq; - if (dc_pred_dir) //left ac_val -= 16; else //top ac_val -= 16 * s->block_wrap[n]; + scale = mquant * 2 + v->halfpq; + q1 = s->current_picture.qscale_table[mb_pos]; if (dc_pred_dir && c_avail && mb_pos) q2 = s->current_picture.qscale_table[mb_pos - 1]; @@ -1044,51 +998,50 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n, if (coded) { int last = 0, skip, value; + const uint8_t *zz_table; int k; + if (v->fcm == PROGRESSIVE) + zz_table = v->zz_8x8[0]; + else { + if (use_pred && (v->fcm == ILACE_FRAME)) { + if (!dc_pred_dir) // top + zz_table = v->zz_8x8[2]; + else // left + zz_table = v->zz_8x8[3]; + } else { + zz_table = v->zzi_8x8; + } + } + while (!last) { vc1_decode_ac_coeff(v, &last, &skip, &value, codingset); i += skip; if (i > 63) break; - if (v->fcm == PROGRESSIVE) - block[v->zz_8x8[0][i++]] = value; - else { - if (use_pred && (v->fcm == ILACE_FRAME)) { - if (!dc_pred_dir) // top - block[v->zz_8x8[2][i++]] = value; - else // left - block[v->zz_8x8[3][i++]] = value; - } else { - block[v->zzi_8x8[i++]] = value; - } - } + block[zz_table[i++]] = value; } /* apply AC prediction if needed */ if (use_pred) { + int sh; + if (dc_pred_dir) { // left + sh = v->left_blk_sh; + } else { //top + sh = v->top_blk_sh; + ac_val += 8; + } /* scale predictors if needed*/ if (q2 && q1 != q2) { q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1; q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1; - if (q1 < 1) return AVERROR_INVALIDDATA; - if (dc_pred_dir) { // left - for (k = 1; k < 8; k++) - block[k << v->left_blk_sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; - } else { //top - for (k = 1; k < 8; k++) - block[k << v->top_blk_sh] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; - } + for (k = 1; k < 8; k++) + block[k << sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; } else { - if (dc_pred_dir) { // left - for (k = 1; k < 8; k++) - block[k << v->left_blk_sh] += ac_val[k]; - } else { // top - for (k = 1; k < 8; k++) - block[k << v->top_blk_sh] += ac_val[k + 8]; - } + for (k = 1; k < 8; k++) + block[k << sh] += ac_val[k]; } } /* save AC coeffs for further prediction */ @@ -1106,51 +1059,37 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n, } if (use_pred) i = 63; + } else { // no AC coeffs int k; memset(ac_val2, 0, 16 * 2); - if (dc_pred_dir) { // left - if (use_pred) { - memcpy(ac_val2, ac_val, 8 * 2); - if (q2 && q1 != q2) { - q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1; - q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1; - if (q1 < 1) - return AVERROR_INVALIDDATA; - for (k = 1; k < 8; k++) - ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; - } - } - } else { // top - if (use_pred) { - memcpy(ac_val2 + 8, ac_val + 8, 8 * 2); - if (q2 && q1 != q2) { - q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1; - q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1; - if (q1 < 1) - return AVERROR_INVALIDDATA; - for (k = 1; k < 8; k++) - ac_val2[k + 8] = (ac_val2[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; - } - } - } /* apply AC prediction if needed */ if (use_pred) { + int sh; if (dc_pred_dir) { // left - for (k = 1; k < 8; k++) { - block[k << v->left_blk_sh] = ac_val2[k] * scale; - if (!v->pquantizer && block[k << v->left_blk_sh]) - block[k << v->left_blk_sh] += (block[k << v->left_blk_sh] < 0) ? -mquant : mquant; - } + sh = v->left_blk_sh; } else { // top - for (k = 1; k < 8; k++) { - block[k << v->top_blk_sh] = ac_val2[k + 8] * scale; - if (!v->pquantizer && block[k << v->top_blk_sh]) - block[k << v->top_blk_sh] += (block[k << v->top_blk_sh] < 0) ? -mquant : mquant; - } + sh = v->top_blk_sh; + ac_val += 8; + ac_val2 += 8; } + memcpy(ac_val2, ac_val, 8 * 2); + if (q2 && q1 != q2) { + q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1; + q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1; + if (q1 < 1) + return AVERROR_INVALIDDATA; + for (k = 1; k < 8; k++) + ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; + } + for (k = 1; k < 8; k++) { + block[k << sh] = ac_val2[k] * scale; + if (!v->pquantizer && block[k << sh]) + block[k << sh] += (block[k << sh] < 0) ? -mquant : mquant; + } + i = 63; } } @@ -1173,6 +1112,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, int scale, off, idx, last, skip, value; int ttblk = ttmb & 7; int pat = 0; + const uint8_t *zz_table; s->bdsp.clear_block(block); @@ -1209,15 +1149,13 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, pat = 0xF; i = 0; last = 0; + zz_table = !v->fcm ? v->zz_8x8[0] : v->zzi_8x8; while (!last) { vc1_decode_ac_coeff(v, &last, &skip, &value, v->codingset2); i += skip; if (i > 63) break; - if (!v->fcm) - idx = v->zz_8x8[0][i++]; - else - idx = v->zzi_8x8[i++]; + idx = zz_table[i++]; block[idx] = value * scale; if (!v->pquantizer) block[idx] += (block[idx] < 0) ? -mquant : mquant; @@ -1237,15 +1175,13 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, last = subblkpat & (1 << (3 - j)); i = 0; off = (j & 1) * 4 + (j & 2) * 16; + zz_table = !v->fcm ? ff_vc1_simple_progressive_4x4_zz : ff_vc1_adv_interlaced_4x4_zz; while (!last) { vc1_decode_ac_coeff(v, &last, &skip, &value, v->codingset2); i += skip; if (i > 15) break; - if (!v->fcm) - idx = ff_vc1_simple_progressive_4x4_zz[i++]; - else - idx = ff_vc1_adv_interlaced_4x4_zz[i++]; + idx = zz_table[i++]; block[idx + off] = value * scale; if (!v->pquantizer) block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant; @@ -1260,6 +1196,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, break; case TT_8X4: pat = ~((subblkpat & 2) * 6 + (subblkpat & 1) * 3) & 0xF; + zz_table = !v->fcm ? v->zz_8x4 : ff_vc1_adv_interlaced_8x4_zz; for (j = 0; j < 2; j++) { last = subblkpat & (1 << (1 - j)); i = 0; @@ -1269,10 +1206,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, i += skip; if (i > 31) break; - if (!v->fcm) - idx = v->zz_8x4[i++] + off; - else - idx = ff_vc1_adv_interlaced_8x4_zz[i++] + off; + idx = zz_table[i++] + off; block[idx] = value * scale; if (!v->pquantizer) block[idx] += (block[idx] < 0) ? -mquant : mquant; @@ -1287,6 +1221,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, break; case TT_4X8: pat = ~(subblkpat * 5) & 0xF; + zz_table = !v->fcm ? v->zz_4x8 : ff_vc1_adv_interlaced_4x8_zz; for (j = 0; j < 2; j++) { last = subblkpat & (1 << (1 - j)); i = 0; @@ -1296,10 +1231,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, i += skip; if (i > 31) break; - if (!v->fcm) - idx = v->zz_4x8[i++] + off; - else - idx = ff_vc1_adv_interlaced_4x8_zz[i++] + off; + idx = zz_table[i++] + off; block[idx] = value * scale; if (!v->pquantizer) block[idx] += (block[idx] < 0) ? -mquant : mquant; -- 2.1.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel