[x264-devel] commit: MMX CABAC mvd sum calculation (Jason Garrett-Glaser )

git version control git at videolan.org
Sat Jun 20 01:28:59 CEST 2009


x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Fri Jun 19 16:03:18 2009 -0700| [ab85c9b0ae08a237472bfd14558353d5ecb92b3d] | committer: Jason Garrett-Glaser 

MMX CABAC mvd sum calculation
Faster CABAC mvd coding.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=ab85c9b0ae08a237472bfd14558353d5ecb92b3d
---

 common/common.h   |    9 +++++++++
 common/x86/util.h |   29 +++++++++++++++++++++++++++++
 encoder/cabac.c   |   12 ++++++------
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/common/common.h b/common/common.h
index aa944f3..1e46ae8 100644
--- a/common/common.h
+++ b/common/common.h
@@ -143,6 +143,15 @@ static inline int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc )
     return sum;
 }
 
+static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
+{
+    int amvd0 = abs(mvdleft[0]) + abs(mvdtop[0]);
+    int amvd1 = abs(mvdleft[1]) + abs(mvdtop[1]);
+    amvd0 = (amvd0 > 2) + (amvd0 > 32);
+    amvd1 = (amvd1 > 2) + (amvd1 > 32);
+    return amvd0 + (amvd1<<16);
+}
+
 /****************************************************************************
  *
  ****************************************************************************/
diff --git a/common/x86/util.h b/common/x86/util.h
index f176cf2..ab1e208 100644
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -107,6 +107,35 @@ static ALWAYS_INLINE int array_non_zero_int_mmx( void *v, int i_count )
     }
     else return array_non_zero_int_c( v, i_count );
 }
+#define x264_cabac_amvd_sum x264_cabac_amvd_sum_mmxext
+static ALWAYS_INLINE uint32_t x264_cabac_amvd_sum_mmxext(int16_t *mvdleft, int16_t *mvdtop)
+{
+    static const uint64_t pw_2    = 0x0002000200020002ULL;
+    static const uint64_t pw_28   = 0x001C001C001C001CULL;
+    static const uint64_t pw_2184 = 0x0888088808880888ULL;
+    /* MIN(((x+28)*2184)>>16,2) = (x>2) + (x>32) */
+    /* 2184 = fix16(1/30) */
+    uint32_t amvd;
+    asm(
+        "movd      %1, %%mm0 \n"
+        "movd      %2, %%mm1 \n"
+        "pxor   %%mm2, %%mm2 \n"
+        "pxor   %%mm3, %%mm3 \n"
+        "psubw  %%mm0, %%mm2 \n"
+        "psubw  %%mm1, %%mm3 \n"
+        "pmaxsw %%mm2, %%mm0 \n"
+        "pmaxsw %%mm3, %%mm1 \n"
+        "paddw     %3, %%mm0 \n"
+        "paddw  %%mm1, %%mm0 \n"
+        "pmulhuw   %4, %%mm0 \n"
+        "pminsw    %5, %%mm0 \n"
+        "movd   %%mm0, %0    \n"
+        :"=r"(amvd)
+        :"m"(*(uint32_t*)mvdleft),"m"(*(uint32_t*)mvdtop),
+         "m"(pw_28),"m"(pw_2184),"m"(pw_2)
+    );
+    return amvd;
+}
 #endif
 
 #endif
diff --git a/encoder/cabac.c b/encoder/cabac.c
index ac6827c..97defa0 100644
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -390,14 +390,11 @@ static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx
     x264_cabac_encode_decision( cb, 54 + ctx, 0 );
 }
 
-static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd )
+static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
 {
     static const uint8_t ctxes[9] = { 0,3,4,5,6,6,6,6,6 };
-    const int amvd = abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 1][l] ) +
-                     abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 8][l] );
     const int i_abs = abs( mvd );
     const int ctxbase = l ? 47 : 40;
-    int ctx = (amvd>2) + (amvd>32);
     int i;
 
     if( i_abs == 0 )
@@ -443,16 +440,19 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
 static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width, int height )
 {
     DECLARE_ALIGNED_4( int16_t mvp[2] );
+    uint32_t amvd;
     int mdx, mdy;
 
     /* Calculate mvd */
     x264_mb_predict_mv( h, i_list, idx, width, mvp );
     mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0];
     mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1];
+    amvd = x264_cabac_amvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
+                               h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
 
     /* encode */
-    x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx );
-    x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy );
+    x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFFFF );
+    x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>16 );
 
     return pack16to32_mask(mdx,mdy);
 }



More information about the x264-devel mailing list