[x265] [PATCH] asm: 16bpp asm code for intra_pred_ang4_2

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Mon Dec 9 08:46:35 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386575143 -19800
#      Mon Dec 09 13:15:43 2013 +0530
# Node ID 96841a72f275447825a266ad02cb1a50738513e0
# Parent  5bb46ef28bc59794404e59de5c62188928685437
asm: 16bpp asm code for intra_pred_ang4_2

diff -r 5bb46ef28bc5 -r 96841a72f275 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Dec 09 10:59:45 2013 +0800
+++ b/source/common/x86/asm-primitives.cpp	Mon Dec 09 13:15:43 2013 +0530
@@ -675,6 +675,9 @@
     {
         p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
         p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
+
+        SETUP_INTRA_ANG4(2, 2, ssse3);
+        SETUP_INTRA_ANG4(34, 2, ssse3);
     }
     if (cpuMask & X265_CPU_SSE4)
     {
diff -r 5bb46ef28bc5 -r 96841a72f275 source/common/x86/const-a.asm
--- a/source/common/x86/const-a.asm	Mon Dec 09 10:59:45 2013 +0800
+++ b/source/common/x86/const-a.asm	Mon Dec 09 13:15:43 2013 +0530
@@ -67,6 +67,7 @@
 
 const pd_1,        times 4 dd 1
 const pd_2,        times 4 dd 2
+const pd_16,       times 4 dd 16
 const pd_32,       times 4 dd 32
 const pd_64,       times 4 dd 64
 const pd_128,      times 4 dd 128
diff -r 5bb46ef28bc5 -r 96841a72f275 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Mon Dec 09 10:59:45 2013 +0800
+++ b/source/common/x86/intrapred16.asm	Mon Dec 09 13:15:43 2013 +0530
@@ -2,6 +2,7 @@
 ;* Copyright (C) 2013 x265 project
 ;*
 ;* Authors: Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
+;*          Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
 ;*
 ;* This program is free software; you can redistribute it and/or modify
 ;* it under the terms of the GNU General Public License as published by
@@ -26,9 +27,17 @@
 
 SECTION_RODATA 32
 
+const ang_table
+%assign x 0
+%rep 32
+    times 8 dw (32-x), x
+%assign x x+1
+%endrep
+
 SECTION .text
 
 cextern pw_1
+cextern pd_16
 cextern pd_32
 cextern pw_4096
 
@@ -398,3 +407,22 @@
 %endrep
 
     RET
+
+;-----------------------------------------------------------------------------
+; void intraPredAng(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
+;-----------------------------------------------------------------------------
+INIT_XMM ssse3
+cglobal intra_pred_ang4_2, 3,3,4
+    cmp               r4m, byte 34
+    cmove              r2, r3mp
+    add                r1, r1
+    movu               m0, [r2 + 4]
+    movh             [r0], m0
+    palignr            m1, m0, 2
+    movh        [r0 + r1], m1
+    palignr            m2, m0, 4
+    movh    [r0 + r1 * 2], m2
+    lea                r1, [r1 * 3]
+    psrldq             m0, 6
+    movh        [r0 + r1], m0
+    RET


More information about the x265-devel mailing list