[x264-devel] commit: Faster 2x2 chroma DC dequant (Henrik Gramner )
git version control
git at videolan.org
Mon Feb 15 10:20:21 CET 2010
x264 | branch: master | Henrik Gramner <hengar-6 at student.ltu.se> | Mon Feb 8 15:53:52 2010 -0800| [16a1fddbd43ec2352a6cb44c7890c38c6535b726] | committer: Jason Garrett-Glaser
Faster 2x2 chroma DC dequant
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=16a1fddbd43ec2352a6cb44c7890c38c6535b726
---
doc/standards.txt | 1 +
encoder/macroblock.c | 24 +++++++++---------------
2 files changed, 10 insertions(+), 15 deletions(-)
diff --git a/doc/standards.txt b/doc/standards.txt
index db9a691..7474d8f 100644
--- a/doc/standards.txt
+++ b/doc/standards.txt
@@ -4,6 +4,7 @@ checkasm is written in gcc, with no attempt at compatibility with anything else.
We make the following additional assumptions which are true of real systems but not guaranteed by C99:
* Two's complement.
* Signed right-shifts are sign-extended.
+* int is 32-bit or larger.
x86-specific assumptions:
* The stack is 16-byte aligned. We align it on entry to libx264 and on entry to any thread, but the compiler must preserve alignment after that.
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index fa7942d..f67a898 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -42,30 +42,24 @@ static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[4] )
int d1 = dct[2] + dct[3]; \
int d2 = dct[0] - dct[1]; \
int d3 = dct[2] - dct[3]; \
- int dmf = dequant_mf[i_qp%6][0]; \
- int qbits = i_qp/6 - 5; \
- if( qbits > 0 ) \
- { \
- dmf <<= qbits; \
- qbits = 0; \
- }
+ int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
static inline void idct_dequant_2x2_dc( int16_t dct[4], int16_t dct4x4[4][16], int dequant_mf[6][16], int i_qp )
{
IDCT_DEQUANT_START
- dct4x4[0][0] = (d0 + d1) * dmf >> -qbits;
- dct4x4[1][0] = (d0 - d1) * dmf >> -qbits;
- dct4x4[2][0] = (d2 + d3) * dmf >> -qbits;
- dct4x4[3][0] = (d2 - d3) * dmf >> -qbits;
+ dct4x4[0][0] = (d0 + d1) * dmf >> 5;
+ dct4x4[1][0] = (d0 - d1) * dmf >> 5;
+ dct4x4[2][0] = (d2 + d3) * dmf >> 5;
+ dct4x4[3][0] = (d2 - d3) * dmf >> 5;
}
static inline void idct_dequant_2x2_dconly( int16_t out[4], int16_t dct[4], int dequant_mf[6][16], int i_qp )
{
IDCT_DEQUANT_START
- out[0] = (d0 + d1) * dmf >> -qbits;
- out[1] = (d0 - d1) * dmf >> -qbits;
- out[2] = (d2 + d3) * dmf >> -qbits;
- out[3] = (d2 - d3) * dmf >> -qbits;
+ out[0] = (d0 + d1) * dmf >> 5;
+ out[1] = (d0 - d1) * dmf >> 5;
+ out[2] = (d2 + d3) * dmf >> 5;
+ out[3] = (d2 - d3) * dmf >> 5;
}
static inline void dct2x2dc( int16_t d[4], int16_t dct4x4[4][16] )
More information about the x264-devel
mailing list