[x265] [PATCH] copy_cnt 4x4 AVX2 asm code, as per new interface

praveen at multicorewareinc.com praveen at multicorewareinc.com
Fri Sep 5 11:44:53 CEST 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1409910275 -19800
# Node ID 1a5956edc730c3f45982e94ecb6694bb613f63f8
# Parent  a2993470dd1aaca47ae1c852ae73675e5353df6d
copy_cnt 4x4 AVX2 asm code, as per new interface

diff -r a2993470dd1a -r 1a5956edc730 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Fri Sep 05 11:39:20 2014 +0530
+++ b/source/common/x86/blockcopy8.asm	Fri Sep 05 15:14:35 2014 +0530
@@ -3991,23 +3991,23 @@
 INIT_YMM avx2
 cglobal copy_cnt_4, 3,3,5
     add         r2d, r2d
-    pxor        m4, m4
+    xorpd       xm3, xm3
 
     ; row 0 & 1
     movq        xm0, [r1]
     movhps      xm0, [r1 + r2]
-    pmovsxwd    m1, xm0
-    movu        [r0 + 0 * mmsize], m1
 
     ; row 2 & 3
     movq        xm1, [r1 + r2 * 2]
     lea         r2, [r2 * 3]
     movhps      xm1, [r1 + r2]
-    pmovsxwd    m2, xm1
-    movu        [r0 + 1 * mmsize], m2
-
-    packsswb    xm0, xm1
-    pcmpeqb     xm0, xm4
+
+    vinserti128 m0, m0, xm1, 1
+    movu    [r0], m0
+
+    vextractf128 xm1, m0, 1
+    packsswb     xm0, xm1
+    pcmpeqb      xm0, xm3
 
     ; get count
     pmovmskb    eax, xm0


More information about the x265-devel mailing list