[x265] [PATCH] asm code for pixeladd_ps_4x4 and testbench integration
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Wed Nov 20 12:45:24 CET 2013
# HG changeset patch
# User Praveen Tiwari
# Date 1384947915 -19800
# Node ID c1e556f54d61422d153ff67f4830dc62dd1111d9
# Parent a7fb47a7eddf18634449a5ac898f7c2d029048e9
asm code for pixeladd_ps_4x4 and testbench integration
diff -r a7fb47a7eddf -r c1e556f54d61 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Wed Nov 20 12:57:57 2013 +0530
+++ b/source/common/CMakeLists.txt Wed Nov 20 17:15:15 2013 +0530
@@ -113,7 +113,7 @@
if(ENABLE_PRIMITIVES_ASM)
set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h)
- set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm blockcopy8.asm)
+ set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm blockcopy8.asm pixeladd8.asm)
if (NOT X64)
set(A_SRCS ${A_SRCS} pixel-32.asm)
endif()
diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Nov 20 12:57:57 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Nov 20 17:15:15 2013 +0530
@@ -633,6 +633,13 @@
p.calcrecon[BLOCK_32x32] = x265_calcRecons32_sse4;
p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse4;
p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4;
+
+ // This function pointer initialization is temporary will be removed
+ // later with macro definitions. It is used to avoid linker errors
+ // until all partitions are coded and commit smaller patches, easier to
+ // review.
+
+ p.chroma_add_ps[X265_CSP_I420][CHROMA_4x4] = x265_pixel_add_ps_4x4_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Wed Nov 20 12:57:57 2013 +0530
+++ b/source/common/x86/pixel.h Wed Nov 20 17:15:15 2013 +0530
@@ -313,7 +313,8 @@
SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 32, cpu);
#define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \
- void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
+ void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);\
+ void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel *dest, int destride, pixel *src0, int16_t *scr1, int srcStride0, int srcStride1);
#define LUMA_PIXELSUB_DEF(cpu) \
SETUP_LUMA_PIXELSUB_PS_FUNC(4, 4, cpu); \
@@ -342,6 +343,8 @@
SETUP_LUMA_PIXELSUB_PS_FUNC(64, 16, cpu); \
SETUP_LUMA_PIXELSUB_PS_FUNC(16, 64, cpu);
+// void x265_pixeladd_ps_4x4_sse4(pixel *dest, int destride, pixel *src0, int16_t *scr1, int srcStride0, int srcStride1);
+
CHROMA_PIXELSUB_DEF(_sse4);
LUMA_PIXELSUB_DEF(_sse4);
diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/pixeladd8.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/x86/pixeladd8.asm Wed Nov 20 17:15:15 2013 +0530
@@ -0,0 +1,79 @@
+;*****************************************************************************
+;* Copyright (C) 2013 x265 project
+;*
+;* Authors: Praveen Kumar Tiwari <praveen at multicorewareinc.com>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+;*
+;* This program is also available under a commercial proprietary license.
+;* For more information, contact us at licensing at multicorewareinc.com.
+;*****************************************************************************/
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA 32
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; void pixel_add_ps_4x4(pixel *dest, int destride, pixel *src0, int16_t *scr1, int srcStride0, int srcStride1)
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal pixel_add_ps_4x4, 6, 6, 2, dest, destride, src0, scr1, srcStride0, srcStride1
+
+add r5, r5
+
+movd m0, [r2]
+pmovzxbw m0, m0
+movh m1, [r3]
+
+paddw m0, m1
+packuswb m0, m0
+
+movd [r0], m0
+
+movd m0, [r2 + r4]
+pmovzxbw m0, m0
+movh m1, [r3 + r5]
+
+paddw m0, m1
+packuswb m0, m0
+
+movd [r0 + r1], m0
+
+movd m0, [r2 + 2 * r4]
+pmovzxbw m0, m0
+movh m1, [r3 + 2 * r5]
+
+paddw m0, m1
+packuswb m0, m0
+
+movd [r0 + 2 * r1], m0
+
+lea r0, [r0 + 2 * r1]
+lea r2, [r2 + 2 * r4]
+lea r3, [r3 + 2 * r5]
+
+movd m0, [r2 + r4]
+pmovzxbw m0, m0
+movh m1, [r3 + r5]
+
+paddw m0, m1
+packuswb m0, m0
+
+movd [r0 + r1], m0
+
+RET
More information about the x265-devel
mailing list