[x265] [PATCH] asm: 16bpp asm code for intra_pred_ang4_2
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Mon Dec 9 08:46:35 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386575143 -19800
# Mon Dec 09 13:15:43 2013 +0530
# Node ID 96841a72f275447825a266ad02cb1a50738513e0
# Parent 5bb46ef28bc59794404e59de5c62188928685437
asm: 16bpp asm code for intra_pred_ang4_2
diff -r 5bb46ef28bc5 -r 96841a72f275 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Dec 09 10:59:45 2013 +0800
+++ b/source/common/x86/asm-primitives.cpp Mon Dec 09 13:15:43 2013 +0530
@@ -675,6 +675,9 @@
{
p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
+
+ SETUP_INTRA_ANG4(2, 2, ssse3);
+ SETUP_INTRA_ANG4(34, 2, ssse3);
}
if (cpuMask & X265_CPU_SSE4)
{
diff -r 5bb46ef28bc5 -r 96841a72f275 source/common/x86/const-a.asm
--- a/source/common/x86/const-a.asm Mon Dec 09 10:59:45 2013 +0800
+++ b/source/common/x86/const-a.asm Mon Dec 09 13:15:43 2013 +0530
@@ -67,6 +67,7 @@
const pd_1, times 4 dd 1
const pd_2, times 4 dd 2
+const pd_16, times 4 dd 16
const pd_32, times 4 dd 32
const pd_64, times 4 dd 64
const pd_128, times 4 dd 128
diff -r 5bb46ef28bc5 -r 96841a72f275 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Mon Dec 09 10:59:45 2013 +0800
+++ b/source/common/x86/intrapred16.asm Mon Dec 09 13:15:43 2013 +0530
@@ -2,6 +2,7 @@
;* Copyright (C) 2013 x265 project
;*
;* Authors: Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
+;* Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
;*
;* This program is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
@@ -26,9 +27,17 @@
SECTION_RODATA 32
+const ang_table
+%assign x 0
+%rep 32
+ times 8 dw (32-x), x
+%assign x x+1
+%endrep
+
SECTION .text
cextern pw_1
+cextern pd_16
cextern pd_32
cextern pw_4096
@@ -398,3 +407,22 @@
%endrep
RET
+
+;-----------------------------------------------------------------------------
+; void intraPredAng(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
+;-----------------------------------------------------------------------------
+INIT_XMM ssse3
+cglobal intra_pred_ang4_2, 3,3,4
+ cmp r4m, byte 34
+ cmove r2, r3mp
+ add r1, r1
+ movu m0, [r2 + 4]
+ movh [r0], m0
+ palignr m1, m0, 2
+ movh [r0 + r1], m1
+ palignr m2, m0, 4
+ movh [r0 + r1 * 2], m2
+ lea r1, [r1 * 3]
+ psrldq m0, 6
+ movh [r0 + r1], m0
+ RET
More information about the x265-devel
mailing list