[x265] [PATCH] asm: Fix sse_pp [64x64] for main10 sse2

ramya at multicorewareinc.com ramya at multicorewareinc.com
Fri Oct 23 11:40:37 CEST 2015


# HG changeset patch
# User Ramya Sriraman <ramya at multicorewareinc.com>
# Date 1445588795 -19800
#      Fri Oct 23 13:56:35 2015 +0530
# Node ID bb80fbf734b37937f76366654bfde320968d26e7
# Parent  a7251c3e0ef810b95bb25be5371035208e36996d
asm: Fix sse_pp [64x64] for main10 sse2

diff -r a7251c3e0ef8 -r bb80fbf734b3 source/common/x86/ssd-a.asm
--- a/source/common/x86/ssd-a.asm	Thu Oct 22 09:12:28 2015 +0530
+++ b/source/common/x86/ssd-a.asm	Fri Oct 23 13:56:35 2015 +0530
@@ -291,8 +291,19 @@
     lea     r2,  [r2 + r6]
     dec     r4d
     jnz  .loop
+%if BIT_DEPTH == 10 && %1 == 64 && %2 ==64
+    movu        m5, m0
+    pxor        m6, m6
+    punpckldq   m0, m6
+    punpckhdq   m5, m6
+    paddq       m0, m5
+    movhlps     m5, m0
+    paddq       m0, m5
+    movq        rax, xm0
+%else 
     HADDD   m0, m5
     movd   eax, xm0
+%endif
     RET
 %endmacro
 %macro SSD_24 2


More information about the x265-devel mailing list