[x265] [PATCH] asm: fix bug for invalid read in sa8d

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Tue May 20 16:27:07 CEST 2014


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1400595977 -19800
#      Tue May 20 19:56:17 2014 +0530
# Node ID 0d2ec86fa28bb3b59e5a4a477d16b11e7743431f
# Parent  a815df2b313f9a37623d90abe7610ebdcd0db5c8
asm: fix bug for invalid read in sa8d

fix invalid read when different stride value given for two input buffer and
enabled 16x16 and 8x8 sa8d primitives.

diff -r a815df2b313f -r 0d2ec86fa28b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue May 20 08:25:45 2014 -0500
+++ b/source/common/x86/asm-primitives.cpp	Tue May 20 19:56:17 2014 +0530
@@ -190,12 +190,14 @@
     p.sse_ss[LUMA_64x64]   = x265_pixel_ssd_ss_64x64_ ## cpu;
 
 #define SA8D_INTER_FROM_BLOCK(cpu) \
-    p.sa8d_inter[LUMA_4x8]  = x265_pixel_satd_4x8_ ## cpu; \
-    p.sa8d_inter[LUMA_8x4]  = x265_pixel_satd_8x4_ ## cpu; \
+    p.sa8d_inter[LUMA_4x8]   = x265_pixel_satd_4x8_ ## cpu; \
+    p.sa8d_inter[LUMA_8x4]   = x265_pixel_satd_8x4_ ## cpu; \
     p.sa8d_inter[LUMA_4x16]  = x265_pixel_satd_4x16_ ## cpu; \
     p.sa8d_inter[LUMA_16x4]  = x265_pixel_satd_16x4_ ## cpu; \
-    p.sa8d_inter[LUMA_12x16]  = x265_pixel_satd_12x16_ ## cpu; \
-    p.sa8d_inter[LUMA_16x12]  = x265_pixel_satd_16x12_ ## cpu; \
+    p.sa8d_inter[LUMA_12x16] = x265_pixel_satd_12x16_ ## cpu; \
+    p.sa8d_inter[LUMA_8x8]   = x265_pixel_sa8d_8x8_ ## cpu; \
+    p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_ ## cpu; \
+    p.sa8d_inter[LUMA_16x12] = x265_pixel_satd_16x12_ ## cpu; \
     p.sa8d_inter[LUMA_16x8]  = x265_pixel_sa8d_16x8_ ## cpu; \
     p.sa8d_inter[LUMA_8x16]  = x265_pixel_sa8d_8x16_ ## cpu; \
     p.sa8d_inter[LUMA_32x24] = x265_pixel_sa8d_32x24_ ## cpu; \
diff -r a815df2b313f -r 0d2ec86fa28b source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Tue May 20 08:25:45 2014 -0500
+++ b/source/common/x86/pixel-a.asm	Tue May 20 19:56:17 2014 +0530
@@ -2959,8 +2959,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3025,8 +3025,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3037,8 +3037,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3058,8 +3058,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3070,8 +3070,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3082,8 +3082,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3094,8 +3094,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3115,8 +3115,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3124,8 +3124,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3136,8 +3136,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3145,8 +3145,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3157,8 +3157,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3166,8 +3166,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3178,8 +3178,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3187,8 +3187,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3208,8 +3208,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3217,8 +3217,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3226,8 +3226,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3247,8 +3247,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3256,8 +3256,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3265,8 +3265,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3277,8 +3277,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3286,8 +3286,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3295,8 +3295,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3316,8 +3316,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3325,8 +3325,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3334,8 +3334,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3346,8 +3346,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3355,8 +3355,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3364,8 +3364,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3376,8 +3376,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3385,8 +3385,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3394,8 +3394,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3415,8 +3415,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3424,8 +3424,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3433,8 +3433,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3445,8 +3445,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3454,8 +3454,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3463,8 +3463,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3475,8 +3475,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3484,8 +3484,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3493,8 +3493,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     add  r2, 16*SIZEOF_PIXEL
     add  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3505,8 +3505,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3514,8 +3514,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]
@@ -3523,8 +3523,8 @@
     SA8D_16x16
     lea  r4, [8*r1]
     lea  r5, [8*r3]
-    sub  r2, r4
-    sub  r0, r5
+    sub  r0, r4
+    sub  r2, r5
     sub  r2, 16*SIZEOF_PIXEL
     sub  r0, 16*SIZEOF_PIXEL
     lea  r4, [3*r1]


More information about the x265-devel mailing list