[x264-devel] commit: Fix high bit depth intra pred functions (Daniel Kang )
git at videolan.org
git at videolan.org
Wed Dec 15 04:19:32 CET 2010
x264 | branch: master | Daniel Kang <daniel.d.kang at gmail.com> | Tue Dec 7 15:19:46 2010 -0500| [5fbb9d62a4ce93b5d36ee337f33990703f46862c] | committer: Jason Garrett-Glaser
Fix high bit depth intra pred functions
And re-enable them accordingly.
Patch from Google Code-In.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=5fbb9d62a4ce93b5d36ee337f33990703f46862c
---
common/x86/predict-a.asm | 52 ++++++++++++++++++++++++++++++++++++++++-----
common/x86/predict-c.c | 10 ++++----
2 files changed, 51 insertions(+), 11 deletions(-)
diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
index c908c3a..e3faba5 100644
--- a/common/x86/predict-a.asm
+++ b/common/x86/predict-a.asm
@@ -168,7 +168,7 @@ cextern pb_reverse
;-----------------------------------------------------------------------------
%macro PREDICT_4x4_DDL 4
cglobal predict_4x4_ddl_%1, 1,1
- mova m1, [r0-FDEC_STRIDEB]
+ movu m1, [r0-FDEC_STRIDEB]
mova m2, m1
mova m3, m1
mova m4, m1
@@ -224,8 +224,21 @@ PREDICT_4x4_DDL mmxext, q , 8, b
;-----------------------------------------------------------------------------
%macro PREDICT_4x4 7
cglobal predict_4x4_ddr_%1, 1,1
- mova m1, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- mova m2, [r0+0*FDEC_STRIDEB-8*SIZEOF_PIXEL]
+ movu m1, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
+ movq m2, [r0+0*FDEC_STRIDEB-8]
+%ifdef HIGH_BIT_DEPTH
+ movh m4, [r0-1*FDEC_STRIDEB-4*SIZEOF_PIXEL]
+ punpckl%2 m2, m4
+ movh m3, [r0-1*FDEC_STRIDEB]
+ punpckh%3 m1, m2
+ PALIGNR m3, m1, 5*SIZEOF_PIXEL, m1
+ mova m1, m3
+ movhps m4, [r0+2*FDEC_STRIDEB-4*SIZEOF_PIXEL]
+ PALIGNR m3, m4, 7*SIZEOF_PIXEL, m4
+ mova m2, m3
+ movhps m4, [r0+3*FDEC_STRIDEB-4*SIZEOF_PIXEL]
+ PALIGNR m3, m4, 7*SIZEOF_PIXEL, m4
+%else
punpckh%2 m2, [r0-1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
movh m3, [r0-1*FDEC_STRIDEB]
punpckh%3 m1, m2
@@ -234,6 +247,7 @@ cglobal predict_4x4_ddr_%1, 1,1
PALIGNR m3, [r0+2*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m4
mova m2, m3
PALIGNR m3, [r0+3*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m4
+%endif
PRED8x8_LOWPASS %5, m0, m3, m1, m2, m4
%assign Y 3
movh [r0+Y*FDEC_STRIDEB], m0
@@ -247,6 +261,19 @@ cglobal predict_4x4_ddr_%1, 1,1
cglobal predict_4x4_vr_%1, 1,1,6*(mmsize/16)
movh m0, [r0-1*FDEC_STRIDEB] ; ........t3t2t1t0
mova m5, m0
+%ifdef HIGH_BIT_DEPTH
+ movhps m1, [r0-1*FDEC_STRIDEB-4*SIZEOF_PIXEL]
+ PALIGNR m0, m1, 7*SIZEOF_PIXEL, m1 ; ......t3t2t1t0lt
+ pavg%5 m5, m0
+ movhps m1, [r0+0*FDEC_STRIDEB-4*SIZEOF_PIXEL]
+ PALIGNR m0, m1, 7*SIZEOF_PIXEL, m1 ; ....t3t2t1t0ltl0
+ mova m1, m0
+ movhps m2, [r0+1*FDEC_STRIDEB-4*SIZEOF_PIXEL]
+ PALIGNR m0, m2, 7*SIZEOF_PIXEL, m2 ; ..t3t2t1t0ltl0l1
+ mova m2, m0
+ movhps m3, [r0+2*FDEC_STRIDEB-4*SIZEOF_PIXEL]
+ PALIGNR m0, m3, 7*SIZEOF_PIXEL, m3 ; t3t2t1t0ltl0l1l2
+%else
PALIGNR m0, [r0-1*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m1 ; ......t3t2t1t0lt
pavg%5 m5, m0
PALIGNR m0, [r0+0*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m1 ; ....t3t2t1t0ltl0
@@ -254,6 +281,7 @@ cglobal predict_4x4_vr_%1, 1,1,6*(mmsize/16)
PALIGNR m0, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m2 ; ..t3t2t1t0ltl0l1
mova m2, m0
PALIGNR m0, [r0+2*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m3 ; t3t2t1t0ltl0l1l2
+%endif
PRED8x8_LOWPASS %5, m3, m1, m0, m2, m4
mova m1, m3
psrl%4 m3, %7*2
@@ -269,12 +297,24 @@ cglobal predict_4x4_vr_%1, 1,1,6*(mmsize/16)
cglobal predict_4x4_hd_%1, 1,1,6*(mmsize/16)
movh m0, [r0-1*FDEC_STRIDEB-4*SIZEOF_PIXEL] ; lt ..
+%ifdef HIGH_BIT_DEPTH
+ movh m1, [r0-1*FDEC_STRIDEB]
+ punpckl%6 m0, m1 ; t3 t2 t1 t0 lt .. .. ..
+ psll%4 m0, %7 ; t2 t1 t0 lt .. .. .. ..
+ movh m1, [r0+3*FDEC_STRIDEB-4*SIZEOF_PIXEL] ; l3
+ movh m2, [r0+2*FDEC_STRIDEB-4*SIZEOF_PIXEL]
+ punpckl%2 m1, m2 ; l2 l3
+ movh m2, [r0+1*FDEC_STRIDEB-4*SIZEOF_PIXEL] ; l1
+ movh m3, [r0+0*FDEC_STRIDEB-4*SIZEOF_PIXEL]
+ punpckl%2 m2, m3 ; l0 l1
+%else
punpckl%6 m0, [r0-1*FDEC_STRIDEB] ; t3 t2 t1 t0 lt .. .. ..
psll%4 m0, %7 ; t2 t1 t0 lt .. .. .. ..
- mova m1, [r0+3*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l3
+ movu m1, [r0+3*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l3
punpckh%2 m1, [r0+2*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l2 l3
- mova m2, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l1
+ movu m2, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l1
punpckh%2 m2, [r0+0*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l0 l1
+%endif
punpckh%3 m1, m2 ; l0 l1 l2 l3
punpckh%6 m1, m0 ; t2 t1 t0 lt l0 l1 l2 l3
mova m0, m1
@@ -378,7 +418,7 @@ cglobal predict_4x4_hu_mmxext, 1,1
;-----------------------------------------------------------------------------
%macro PREDICT_4x4_V1 4
cglobal predict_4x4_vl_%1, 1,1,6*(mmsize/16)
- mova m1, [r0-FDEC_STRIDEB]
+ movu m1, [r0-FDEC_STRIDEB]
mova m3, m1
mova m2, m1
psrl%2 m3, %3
diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
index 994e05f..829a191 100644
--- a/common/x86/predict-c.c
+++ b/common/x86/predict-c.c
@@ -505,14 +505,14 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
#if HIGH_BIT_DEPTH
if( !(cpu&X264_CPU_SSE2) )
return;
-// pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
+ pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_sse2;
-// pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
+ pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
if( !(cpu&X264_CPU_SSSE3) )
return;
-// pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
-// pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
-// pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
+ pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
+ pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
+ pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
#else
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_mmxext;
More information about the x264-devel
mailing list