[x264-devel] [Git][videolan/x264][master] 2 commits: x86: Fix clobbers for inline asm

Sun Oct 25 18:20:49 CET 2020


Anton Mitrofanov pushed to branch master at VideoLAN / x264


Commits:
f15ee064 by Anton Mitrofanov at 2020-10-25T18:10:15+01:00
x86: Fix clobbers for inline asm

- - - - -
37329c4f by Anton Mitrofanov at 2020-10-25T18:10:15+01:00
x86: Fix memory operands for inline asm

- - - - -


4 changed files:

- common/base.h
- common/x86/mc-c.c
- common/x86/predict-c.c
- common/x86/util.h


Changes:

=====================================
common/base.h
=====================================
@@ -76,6 +76,17 @@ typedef union { x264_uint128_t i; uint64_t q[2]; uint32_t d[4]; uint16_t w[8]; u
 #define CP64(dst,src) M64(dst) = M64(src)
 #define CP128(dst,src) M128(dst) = M128(src)
 
+/* Macros for memory constraints of inline asm */
+#if defined(__GNUC__) && __GNUC__ >= 8 && !defined(__clang__) && !defined(__INTEL_COMPILER)
+#define MEM_FIX(x, t, s) (*(t (*)[s])(x))
+#define MEM_DYN(x, t) (*(t (*)[])(x))
+#else
+//older versions of gcc prefer casting to structure instead of array
+#define MEM_FIX(x, t, s) (*(struct { t a[s]; } (*))(x))
+//let's set an arbitrary large constant size
+#define MEM_DYN(x, t) MEM_FIX(x, t, 4096)
+#endif
+
 /****************************************************************************
  * Constants
  ****************************************************************************/


=====================================
common/x86/mc-c.c
=====================================
@@ -747,6 +747,7 @@ do\
         "movd   %%xmm0, %0         \n"\
         :"+&r"(temp_s)\
         :"r"(temp_x)\
+        :"xmm0", "xmm1"\
     );\
     s = temp_s;\
 } while( 0 )
@@ -762,6 +763,7 @@ do\
         "movd   %%xmm0, %0         \n"\
         :"+&r"(temp)\
         :"m"(M32(x))\
+        :"xmm0", "xmm1"\
     );\
     (s)[0] = temp.w[0];\
     (s)[1] = temp.w[1];\


=====================================
common/x86/predict-c.c
=====================================
@@ -91,8 +91,11 @@ static void predict_16x16_p_##name( pixel *src )\
         "paddd        %%xmm1, %%xmm0 \n"\
         "movd         %%xmm0, %0     \n"\
         :"=r"(H)\
-        :"m"(src[-FDEC_STRIDE-1]), "m"(src[-FDEC_STRIDE+8]),\
-         "m"(*pw_12345678), "m"(*pw_m87654321)\
+        :"m"(MEM_FIX(&src[-FDEC_STRIDE-1], const pixel, 8)),\
+         "m"(MEM_FIX(&src[-FDEC_STRIDE+8], const pixel, 8)),\
+         "m"(MEM_FIX(pw_12345678, const int16_t, 8)),\
+         "m"(MEM_FIX(pw_m87654321, const int16_t, 8))\
+        :"xmm0", "xmm1"\
     );
 #else // !HIGH_BIT_DEPTH
 #define PREDICT_16x16_P_ASM\
@@ -110,8 +113,12 @@ static void predict_16x16_p_##name( pixel *src )\
         "movd        %%mm0, %0    \n"\
         "movswl        %w0, %0    \n"\
         :"=r"(H)\
-        :"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),\
-         "m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)\
+        :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\
+         "m"(MEM_FIX(&src[-FDEC_STRIDE+8], const pixel, 8)),\
+         "m"(MEM_FIX(&src[-FDEC_STRIDE-8], const pixel, 8)),\
+         "m"(MEM_FIX(pb_12345678, const int8_t, 8)),\
+         "m"(MEM_FIX(pb_m87654321, const int8_t, 8))\
+        :"mm0", "mm1"\
     );
 #endif // HIGH_BIT_DEPTH
 
@@ -229,7 +236,9 @@ static void predict_8x8c_p_##name( pixel *src )\
         "paddd        %%xmm1, %%xmm0 \n"\
         "movd         %%xmm0, %0     \n"\
         :"=r"(H)\
-        :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)\
+        :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\
+         "m"(MEM_FIX(pw_m32101234, const int16_t, 8))\
+        :"xmm0", "xmm1"\
     );
 #else // !HIGH_BIT_DEPTH
 #define PREDICT_8x8C_P_ASM\
@@ -243,7 +252,9 @@ static void predict_8x8c_p_##name( pixel *src )\
         "movd        %%mm0, %0    \n"\
         "movswl        %w0, %0    \n"\
         :"=r"(H)\
-        :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)\
+        :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\
+         "m"(MEM_FIX(pb_m32101234, const int8_t, 8))\
+        :"mm0", "mm1"\
     );
 #endif // HIGH_BIT_DEPTH
 


=====================================
common/x86/util.h
=====================================
@@ -56,6 +56,7 @@ static ALWAYS_INLINE void x264_median_mv_mmx2( int16_t *dst, int16_t *a, int16_t
         "movd   %%mm0, %0    \n"
         :"=m"(*(x264_union32_t*)dst)
         :"m"(M32( a )), "m"(M32( b )), "m"(M32( c ))
+        :"mm0", "mm1", "mm2", "mm3"
     );
 }
 
@@ -90,7 +91,8 @@ static ALWAYS_INLINE int x264_predictor_difference_mmx2( int16_t (*mvc)[2], intp
         "paddd   %%mm0, %%mm4 \n"
         "movd    %%mm4, %0    \n"
         :"=r"(sum), "+r"(i_mvc)
-        :"r"(mvc), "m"(M64( mvc )), "m"(pw_1)
+        :"r"(mvc), "m"(MEM_DYN( mvc, const int16_t )), "m"(pw_1)
+        :"mm0", "mm2", "mm3", "mm4", "cc"
     );
     return sum;
 }
@@ -117,6 +119,7 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmx2(uint8_t *mvdleft, uint8_t
         :"=r"(amvd)
         :"m"(M16( mvdleft )),"m"(M16( mvdtop )),
          "m"(pb_2),"m"(pb_32),"m"(pb_33)
+        :"mm0", "mm1", "mm2"
     );
     return amvd;
 }
@@ -176,8 +179,9 @@ static ALWAYS_INLINE int x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*
         "and          $1, %k2   \n"
         "sub          %2, %4    \n" // output += !(mv == pmv || mv == 0)
         "3:                     \n"
-        :"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
-        :"r"(dst), "g"(pmv), "m"(pd_32), "m"(M64( mvc ))
+        :"+r"(mvc), "=m"(MEM_DYN( dst, int16_t )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
+        :"r"(dst), "g"(pmv), "m"(pd_32), "m"(MEM_DYN( mvc, const int16_t ))
+        :"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "cc"
     );
     return i;
 }
@@ -243,8 +247,9 @@ static ALWAYS_INLINE int x264_predictor_roundclip_mmx2( int16_t (*dst)[2], int16
         "and          $1, %k2   \n"
         "sub          %2, %4    \n"
         "3:                     \n"
-        :"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
-        :"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(M64( mvc ))
+        :"+r"(mvc), "=m"(MEM_DYN( dst, int16_t )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
+        :"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(MEM_DYN( mvc, const int16_t ))
+        :"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "cc"
     );
     return i;
 }



View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/7ab4c928ef4511ea5753a36a57c3506d9fd5086b...37329c4f103327b6d306c8148c79d9658419231b

-- 
View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/7ab4c928ef4511ea5753a36a57c3506d9fd5086b...37329c4f103327b6d306c8148c79d9658419231b
You're receiving this email because of your account on code.videolan.org.