[x265] [PATCH] asm: 10bpp fix for alignment in sse_ss routine

murugan at multicorewareinc.com murugan at multicorewareinc.com
Tue Dec 3 08:24:41 CET 2013


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1386053476 -19800
#      Tue Dec 03 12:21:16 2013 +0530
# Node ID 5c2fcf4dfc981de6ede28e6b205e0d27c6d4608d
# Parent  908b7918a1e488cf8f500a7767714277a82d368a
asm: 10bpp fix for alignment in sse_ss routine

diff -r 908b7918a1e4 -r 5c2fcf4dfc98 source/common/x86/ssd-a.asm
--- a/source/common/x86/ssd-a.asm	Tue Dec 03 12:15:52 2013 +0530
+++ b/source/common/x86/ssd-a.asm	Tue Dec 03 12:21:16 2013 +0530
@@ -77,10 +77,10 @@
 %endif
     pxor    m0, m0
 .loop
-    mova    m1, [r0]
-    mova    m2, [r0+offset0_1]
-    mova    m3, [r0+offset0_2]
-    mova    m4, [r0+offset0_3]
+    movu    m1, [r0]
+    movu    m2, [r0+offset0_1]
+    movu    m3, [r0+offset0_2]
+    movu    m4, [r0+offset0_3]
     psubw   m1, [r2]
     psubw   m2, [r2+offset1_1]
     psubw   m3, [r2+offset1_2]
@@ -122,8 +122,18 @@
 SSD_ONE     8,  4
 SSD_ONE     8,  8
 SSD_ONE     8, 16
+SSD_ONE     8, 32
+SSD_ONE    16,  4
 SSD_ONE    16,  8
+SSD_ONE    16, 12
 SSD_ONE    16, 16
+SSD_ONE    16, 32
+SSD_ONE    16, 64
+SSD_ONE    32,  8
+SSD_ONE    32, 16
+SSD_ONE    32, 24
+SSD_ONE    32, 32
+SSD_ONE    32, 64
 INIT_YMM avx2
 SSD_ONE    16,  8
 SSD_ONE    16, 16


More information about the x265-devel mailing list